b3h-young123 commited on Feb 11, 2025

Commit

66d99ea

verified ·

1 Parent(s): d864b06

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/__init__.py +0 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/DAT.py +1182 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/HAT.py +1277 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-DAT +201 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-ESRGAN +201 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-HAT +21 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-RealESRGAN +29 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-SCUNet +201 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-SPSR +201 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-SwiftSRGAN +121 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-Swin2SR +201 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-SwinIR +201 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-lama +201 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LaMa.py +694 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/ChannelAttention.py +110 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/LICENSE +201 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/OSA.py +577 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/OSAG.py +60 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/OmniSR.py +143 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/esa.py +294 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/layernorm.py +70 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/pixelshuffle.py +31 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/RRDB.py +296 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SCUNet.py +455 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SPSR.py +383 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SRVGG.py +114 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SwiftSRGAN.py +161 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/Swin2SR.py +1377 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SwinIR.py +1224 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/__init__.py +0 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/block.py +546 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/LICENSE-GFPGAN +351 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/LICENSE-RestoreFormer +351 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/LICENSE-codeformer +35 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/arcface_arch.py +265 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/codeformer.py +790 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/fused_act.py +81 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/gfpgan_bilinear_arch.py +389 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/gfpganv1_arch.py +566 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/gfpganv1_clean_arch.py +370 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/restoreformer_arch.py +776 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/stylegan2_arch.py +865 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/stylegan2_bilinear_arch.py +709 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/stylegan2_clean_arch.py +453 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/upfirdn2d.py +194 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/timm/LICENSE +201 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/timm/drop.py +223 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/timm/helpers.py +31 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/timm/weight_init.py +128 -0
LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/model_loading.py +99 -0

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/__init__.py ADDED Viewed

File without changes

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/DAT.py ADDED Viewed

	@@ -0,0 +1,1182 @@

+# pylint: skip-file
+import math
+import re
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as checkpoint
+from einops import rearrange
+from einops.layers.torch import Rearrange
+from torch import Tensor
+from torch.nn import functional as F
+from .timm.drop import DropPath
+from .timm.weight_init import trunc_normal_
+def img2windows(img, H_sp, W_sp):
+    """
+    Input: Image (B, C, H, W)
+    Output: Window Partition (B', N, C)
+    """
+    B, C, H, W = img.shape
+    img_reshape = img.view(B, C, H // H_sp, H_sp, W // W_sp, W_sp)
+    img_perm = (
+        img_reshape.permute(0, 2, 4, 3, 5, 1).contiguous().reshape(-1, H_sp * W_sp, C)
+    )
+    return img_perm
+def windows2img(img_splits_hw, H_sp, W_sp, H, W):
+    """
+    Input: Window Partition (B', N, C)
+    Output: Image (B, H, W, C)
+    """
+    B = int(img_splits_hw.shape[0] / (H * W / H_sp / W_sp))
+    img = img_splits_hw.view(B, H // H_sp, W // W_sp, H_sp, W_sp, -1)
+    img = img.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return img
+class SpatialGate(nn.Module):
+    """Spatial-Gate.
+    Args:
+        dim (int): Half of input channels.
+    """
+    def __init__(self, dim):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.conv = nn.Conv2d(
+            dim, dim, kernel_size=3, stride=1, padding=1, groups=dim
+        )  # DW Conv
+    def forward(self, x, H, W):
+        # Split
+        x1, x2 = x.chunk(2, dim=-1)
+        B, N, C = x.shape
+        x2 = (
+            self.conv(self.norm(x2).transpose(1, 2).contiguous().view(B, C // 2, H, W))
+            .flatten(2)
+            .transpose(-1, -2)
+            .contiguous()
+        )
+        return x1 * x2
+class SGFN(nn.Module):
+    """Spatial-Gate Feed-Forward Network.
+    Args:
+        in_features (int): Number of input channels.
+        hidden_features (int | None): Number of hidden channels. Default: None
+        out_features (int | None): Number of output channels. Default: None
+        act_layer (nn.Module): Activation layer. Default: nn.GELU
+        drop (float): Dropout rate. Default: 0.0
+    """
+    def __init__(
+        self,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        act_layer=nn.GELU,
+        drop=0.0,
+    ):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.sg = SpatialGate(hidden_features // 2)
+        self.fc2 = nn.Linear(hidden_features // 2, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x, H, W):
+        """
+        Input: x: (B, H*W, C), H, W
+        Output: x: (B, H*W, C)
+        """
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.sg(x, H, W)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class DynamicPosBias(nn.Module):
+    # The implementation builds on Crossformer code https://github.com/cheerss/CrossFormer/blob/main/models/crossformer.py
+    """Dynamic Relative Position Bias.
+    Args:
+        dim (int): Number of input channels.
+        num_heads (int): Number of attention heads.
+        residual (bool):  If True, use residual strage to connect conv.
+    """
+    def __init__(self, dim, num_heads, residual):
+        super().__init__()
+        self.residual = residual
+        self.num_heads = num_heads
+        self.pos_dim = dim // 4
+        self.pos_proj = nn.Linear(2, self.pos_dim)
+        self.pos1 = nn.Sequential(
+            nn.LayerNorm(self.pos_dim),
+            nn.ReLU(inplace=True),
+            nn.Linear(self.pos_dim, self.pos_dim),
+        )
+        self.pos2 = nn.Sequential(
+            nn.LayerNorm(self.pos_dim),
+            nn.ReLU(inplace=True),
+            nn.Linear(self.pos_dim, self.pos_dim),
+        )
+        self.pos3 = nn.Sequential(
+            nn.LayerNorm(self.pos_dim),
+            nn.ReLU(inplace=True),
+            nn.Linear(self.pos_dim, self.num_heads),
+        )
+    def forward(self, biases):
+        if self.residual:
+            pos = self.pos_proj(biases)  # 2Gh-1 * 2Gw-1, heads
+            pos = pos + self.pos1(pos)
+            pos = pos + self.pos2(pos)
+            pos = self.pos3(pos)
+        else:
+            pos = self.pos3(self.pos2(self.pos1(self.pos_proj(biases))))
+        return pos
+class Spatial_Attention(nn.Module):
+    """Spatial Window Self-Attention.
+    It supports rectangle window (containing square window).
+    Args:
+        dim (int): Number of input channels.
+        idx (int): The indentix of window. (0/1)
+        split_size (tuple(int)): Height and Width of spatial window.
+        dim_out (int | None): The dimension of the attention output. Default: None
+        num_heads (int): Number of attention heads. Default: 6
+        attn_drop (float): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float): Dropout ratio of output. Default: 0.0
+        qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set
+        position_bias (bool): The dynamic relative position bias. Default: True
+    """
+    def __init__(
+        self,
+        dim,
+        idx,
+        split_size=[8, 8],
+        dim_out=None,
+        num_heads=6,
+        attn_drop=0.0,
+        proj_drop=0.0,
+        qk_scale=None,
+        position_bias=True,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.dim_out = dim_out or dim
+        self.split_size = split_size
+        self.num_heads = num_heads
+        self.idx = idx
+        self.position_bias = position_bias
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+        if idx == 0:
+            H_sp, W_sp = self.split_size[0], self.split_size[1]
+        elif idx == 1:
+            W_sp, H_sp = self.split_size[0], self.split_size[1]
+        else:
+            print("ERROR MODE", idx)
+            exit(0)
+        self.H_sp = H_sp
+        self.W_sp = W_sp
+        if self.position_bias:
+            self.pos = DynamicPosBias(self.dim // 4, self.num_heads, residual=False)
+            # generate mother-set
+            position_bias_h = torch.arange(1 - self.H_sp, self.H_sp)
+            position_bias_w = torch.arange(1 - self.W_sp, self.W_sp)
+            biases = torch.stack(torch.meshgrid([position_bias_h, position_bias_w]))
+            biases = biases.flatten(1).transpose(0, 1).contiguous().float()
+            self.register_buffer("rpe_biases", biases)
+            # get pair-wise relative position index for each token inside the window
+            coords_h = torch.arange(self.H_sp)
+            coords_w = torch.arange(self.W_sp)
+            coords = torch.stack(torch.meshgrid([coords_h, coords_w]))
+            coords_flatten = torch.flatten(coords, 1)
+            relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]
+            relative_coords = relative_coords.permute(1, 2, 0).contiguous()
+            relative_coords[:, :, 0] += self.H_sp - 1
+            relative_coords[:, :, 1] += self.W_sp - 1
+            relative_coords[:, :, 0] *= 2 * self.W_sp - 1
+            relative_position_index = relative_coords.sum(-1)
+            self.register_buffer("relative_position_index", relative_position_index)
+        self.attn_drop = nn.Dropout(attn_drop)
+    def im2win(self, x, H, W):
+        B, N, C = x.shape
+        x = x.transpose(-2, -1).contiguous().view(B, C, H, W)
+        x = img2windows(x, self.H_sp, self.W_sp)
+        x = (
+            x.reshape(-1, self.H_sp * self.W_sp, self.num_heads, C // self.num_heads)
+            .permute(0, 2, 1, 3)
+            .contiguous()
+        )
+        return x
+    def forward(self, qkv, H, W, mask=None):
+        """
+        Input: qkv: (B, 3*L, C), H, W, mask: (B, N, N), N is the window size
+        Output: x (B, H, W, C)
+        """
+        q, k, v = qkv[0], qkv[1], qkv[2]
+        B, L, C = q.shape
+        assert L == H * W, "flatten img_tokens has wrong size"
+        # partition the q,k,v, image to window
+        q = self.im2win(q, H, W)
+        k = self.im2win(k, H, W)
+        v = self.im2win(v, H, W)
+        q = q * self.scale
+        attn = q @ k.transpose(-2, -1)  # B head N C @ B head C N --> B head N N
+        # calculate drpe
+        if self.position_bias:
+            pos = self.pos(self.rpe_biases)
+            # select position bias
+            relative_position_bias = pos[self.relative_position_index.view(-1)].view(
+                self.H_sp * self.W_sp, self.H_sp * self.W_sp, -1
+            )
+            relative_position_bias = relative_position_bias.permute(
+                2, 0, 1
+            ).contiguous()
+            attn = attn + relative_position_bias.unsqueeze(0)
+        N = attn.shape[3]
+        # use mask for shift window
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(
+                0
+            )
+            attn = attn.view(-1, self.num_heads, N, N)
+        attn = nn.functional.softmax(attn, dim=-1, dtype=attn.dtype)
+        attn = self.attn_drop(attn)
+        x = attn @ v
+        x = x.transpose(1, 2).reshape(
+            -1, self.H_sp * self.W_sp, C
+        )  # B head N N @ B head N C
+        # merge the window, window to image
+        x = windows2img(x, self.H_sp, self.W_sp, H, W)  # B H' W' C
+        return x
+class Adaptive_Spatial_Attention(nn.Module):
+    # The implementation builds on CAT code https://github.com/Zhengchen1999/CAT
+    """Adaptive Spatial Self-Attention
+    Args:
+        dim (int): Number of input channels.
+        num_heads (int): Number of attention heads. Default: 6
+        split_size (tuple(int)): Height and Width of spatial window.
+        shift_size (tuple(int)): Shift size for spatial window.
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float): Dropout rate. Default: 0.0
+        attn_drop (float): Attention dropout rate. Default: 0.0
+        rg_idx (int): The indentix of Residual Group (RG)
+        b_idx (int): The indentix of Block in each RG
+    """
+    def __init__(
+        self,
+        dim,
+        num_heads,
+        reso=64,
+        split_size=[8, 8],
+        shift_size=[1, 2],
+        qkv_bias=False,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        rg_idx=0,
+        b_idx=0,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.split_size = split_size
+        self.shift_size = shift_size
+        self.b_idx = b_idx
+        self.rg_idx = rg_idx
+        self.patches_resolution = reso
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        assert (
+            0 <= self.shift_size[0] < self.split_size[0]
+        ), "shift_size must in 0-split_size0"
+        assert (
+            0 <= self.shift_size[1] < self.split_size[1]
+        ), "shift_size must in 0-split_size1"
+        self.branch_num = 2
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(drop)
+        self.attns = nn.ModuleList(
+            [
+                Spatial_Attention(
+                    dim // 2,
+                    idx=i,
+                    split_size=split_size,
+                    num_heads=num_heads // 2,
+                    dim_out=dim // 2,
+                    qk_scale=qk_scale,
+                    attn_drop=attn_drop,
+                    proj_drop=drop,
+                    position_bias=True,
+                )
+                for i in range(self.branch_num)
+            ]
+        )
+        if (self.rg_idx % 2 == 0 and self.b_idx > 0 and (self.b_idx - 2) % 4 == 0) or (
+            self.rg_idx % 2 != 0 and self.b_idx % 4 == 0
+        ):
+            attn_mask = self.calculate_mask(
+                self.patches_resolution, self.patches_resolution
+            )
+            self.register_buffer("attn_mask_0", attn_mask[0])
+            self.register_buffer("attn_mask_1", attn_mask[1])
+        else:
+            attn_mask = None
+            self.register_buffer("attn_mask_0", None)
+            self.register_buffer("attn_mask_1", None)
+        self.dwconv = nn.Sequential(
+            nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, groups=dim),
+            nn.BatchNorm2d(dim),
+            nn.GELU(),
+        )
+        self.channel_interaction = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(dim, dim // 8, kernel_size=1),
+            nn.BatchNorm2d(dim // 8),
+            nn.GELU(),
+            nn.Conv2d(dim // 8, dim, kernel_size=1),
+        )
+        self.spatial_interaction = nn.Sequential(
+            nn.Conv2d(dim, dim // 16, kernel_size=1),
+            nn.BatchNorm2d(dim // 16),
+            nn.GELU(),
+            nn.Conv2d(dim // 16, 1, kernel_size=1),
+        )
+    def calculate_mask(self, H, W):
+        # The implementation builds on Swin Transformer code https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py
+        # calculate attention mask for shift window
+        img_mask_0 = torch.zeros((1, H, W, 1))  # 1 H W 1 idx=0
+        img_mask_1 = torch.zeros((1, H, W, 1))  # 1 H W 1 idx=1
+        h_slices_0 = (
+            slice(0, -self.split_size[0]),
+            slice(-self.split_size[0], -self.shift_size[0]),
+            slice(-self.shift_size[0], None),
+        )
+        w_slices_0 = (
+            slice(0, -self.split_size[1]),
+            slice(-self.split_size[1], -self.shift_size[1]),
+            slice(-self.shift_size[1], None),
+        )
+        h_slices_1 = (
+            slice(0, -self.split_size[1]),
+            slice(-self.split_size[1], -self.shift_size[1]),
+            slice(-self.shift_size[1], None),
+        )
+        w_slices_1 = (
+            slice(0, -self.split_size[0]),
+            slice(-self.split_size[0], -self.shift_size[0]),
+            slice(-self.shift_size[0], None),
+        )
+        cnt = 0
+        for h in h_slices_0:
+            for w in w_slices_0:
+                img_mask_0[:, h, w, :] = cnt
+                cnt += 1
+        cnt = 0
+        for h in h_slices_1:
+            for w in w_slices_1:
+                img_mask_1[:, h, w, :] = cnt
+                cnt += 1
+        # calculate mask for window-0
+        img_mask_0 = img_mask_0.view(
+            1,
+            H // self.split_size[0],
+            self.split_size[0],
+            W // self.split_size[1],
+            self.split_size[1],
+            1,
+        )
+        img_mask_0 = (
+            img_mask_0.permute(0, 1, 3, 2, 4, 5)
+            .contiguous()
+            .view(-1, self.split_size[0], self.split_size[1], 1)
+        )  # nW, sw[0], sw[1], 1
+        mask_windows_0 = img_mask_0.view(-1, self.split_size[0] * self.split_size[1])
+        attn_mask_0 = mask_windows_0.unsqueeze(1) - mask_windows_0.unsqueeze(2)
+        attn_mask_0 = attn_mask_0.masked_fill(
+            attn_mask_0 != 0, float(-100.0)
+        ).masked_fill(attn_mask_0 == 0, float(0.0))
+        # calculate mask for window-1
+        img_mask_1 = img_mask_1.view(
+            1,
+            H // self.split_size[1],
+            self.split_size[1],
+            W // self.split_size[0],
+            self.split_size[0],
+            1,
+        )
+        img_mask_1 = (
+            img_mask_1.permute(0, 1, 3, 2, 4, 5)
+            .contiguous()
+            .view(-1, self.split_size[1], self.split_size[0], 1)
+        )  # nW, sw[1], sw[0], 1
+        mask_windows_1 = img_mask_1.view(-1, self.split_size[1] * self.split_size[0])
+        attn_mask_1 = mask_windows_1.unsqueeze(1) - mask_windows_1.unsqueeze(2)
+        attn_mask_1 = attn_mask_1.masked_fill(
+            attn_mask_1 != 0, float(-100.0)
+        ).masked_fill(attn_mask_1 == 0, float(0.0))
+        return attn_mask_0, attn_mask_1
+    def forward(self, x, H, W):
+        """
+        Input: x: (B, H*W, C), H, W
+        Output: x: (B, H*W, C)
+        """
+        B, L, C = x.shape
+        assert L == H * W, "flatten img_tokens has wrong size"
+        qkv = self.qkv(x).reshape(B, -1, 3, C).permute(2, 0, 1, 3)  # 3, B, HW, C
+        # V without partition
+        v = qkv[2].transpose(-2, -1).contiguous().view(B, C, H, W)
+        # image padding
+        max_split_size = max(self.split_size[0], self.split_size[1])
+        pad_l = pad_t = 0
+        pad_r = (max_split_size - W % max_split_size) % max_split_size
+        pad_b = (max_split_size - H % max_split_size) % max_split_size
+        qkv = qkv.reshape(3 * B, H, W, C).permute(0, 3, 1, 2)  # 3B C H W
+        qkv = (
+            F.pad(qkv, (pad_l, pad_r, pad_t, pad_b))
+            .reshape(3, B, C, -1)
+            .transpose(-2, -1)
+        )  # l r t b
+        _H = pad_b + H
+        _W = pad_r + W
+        _L = _H * _W
+        # window-0 and window-1 on split channels [C/2, C/2]; for square windows (e.g., 8x8), window-0 and window-1 can be merged
+        # shift in block: (0, 4, 8, ...), (2, 6, 10, ...), (0, 4, 8, ...), (2, 6, 10, ...), ...
+        if (self.rg_idx % 2 == 0 and self.b_idx > 0 and (self.b_idx - 2) % 4 == 0) or (
+            self.rg_idx % 2 != 0 and self.b_idx % 4 == 0
+        ):
+            qkv = qkv.view(3, B, _H, _W, C)
+            qkv_0 = torch.roll(
+                qkv[:, :, :, :, : C // 2],
+                shifts=(-self.shift_size[0], -self.shift_size[1]),
+                dims=(2, 3),
+            )
+            qkv_0 = qkv_0.view(3, B, _L, C // 2)
+            qkv_1 = torch.roll(
+                qkv[:, :, :, :, C // 2 :],
+                shifts=(-self.shift_size[1], -self.shift_size[0]),
+                dims=(2, 3),
+            )
+            qkv_1 = qkv_1.view(3, B, _L, C // 2)
+            if self.patches_resolution != _H or self.patches_resolution != _W:
+                mask_tmp = self.calculate_mask(_H, _W)
+                x1_shift = self.attns[0](qkv_0, _H, _W, mask=mask_tmp[0].to(x.device))
+                x2_shift = self.attns[1](qkv_1, _H, _W, mask=mask_tmp[1].to(x.device))
+            else:
+                x1_shift = self.attns[0](qkv_0, _H, _W, mask=self.attn_mask_0)
+                x2_shift = self.attns[1](qkv_1, _H, _W, mask=self.attn_mask_1)
+            x1 = torch.roll(
+                x1_shift, shifts=(self.shift_size[0], self.shift_size[1]), dims=(1, 2)
+            )
+            x2 = torch.roll(
+                x2_shift, shifts=(self.shift_size[1], self.shift_size[0]), dims=(1, 2)
+            )
+            x1 = x1[:, :H, :W, :].reshape(B, L, C // 2)
+            x2 = x2[:, :H, :W, :].reshape(B, L, C // 2)
+            # attention output
+            attened_x = torch.cat([x1, x2], dim=2)
+        else:
+            x1 = self.attns[0](qkv[:, :, :, : C // 2], _H, _W)[:, :H, :W, :].reshape(
+                B, L, C // 2
+            )
+            x2 = self.attns[1](qkv[:, :, :, C // 2 :], _H, _W)[:, :H, :W, :].reshape(
+                B, L, C // 2
+            )
+            # attention output
+            attened_x = torch.cat([x1, x2], dim=2)
+        # convolution output
+        conv_x = self.dwconv(v)
+        # Adaptive Interaction Module (AIM)
+        # C-Map (before sigmoid)
+        channel_map = (
+            self.channel_interaction(conv_x)
+            .permute(0, 2, 3, 1)
+            .contiguous()
+            .view(B, 1, C)
+        )
+        # S-Map (before sigmoid)
+        attention_reshape = attened_x.transpose(-2, -1).contiguous().view(B, C, H, W)
+        spatial_map = self.spatial_interaction(attention_reshape)
+        # C-I
+        attened_x = attened_x * torch.sigmoid(channel_map)
+        # S-I
+        conv_x = torch.sigmoid(spatial_map) * conv_x
+        conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(B, L, C)
+        x = attened_x + conv_x
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Adaptive_Channel_Attention(nn.Module):
+    # The implementation builds on XCiT code https://github.com/facebookresearch/xcit
+    """Adaptive Channel Self-Attention
+    Args:
+        dim (int): Number of input channels.
+        num_heads (int): Number of attention heads. Default: 6
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set.
+        attn_drop (float): Attention dropout rate. Default: 0.0
+        drop_path (float): Stochastic depth rate. Default: 0.0
+    """
+    def __init__(
+        self,
+        dim,
+        num_heads=8,
+        qkv_bias=False,
+        qk_scale=None,
+        attn_drop=0.0,
+        proj_drop=0.0,
+    ):
+        super().__init__()
+        self.num_heads = num_heads
+        self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1))
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.dwconv = nn.Sequential(
+            nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, groups=dim),
+            nn.BatchNorm2d(dim),
+            nn.GELU(),
+        )
+        self.channel_interaction = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(dim, dim // 8, kernel_size=1),
+            nn.BatchNorm2d(dim // 8),
+            nn.GELU(),
+            nn.Conv2d(dim // 8, dim, kernel_size=1),
+        )
+        self.spatial_interaction = nn.Sequential(
+            nn.Conv2d(dim, dim // 16, kernel_size=1),
+            nn.BatchNorm2d(dim // 16),
+            nn.GELU(),
+            nn.Conv2d(dim // 16, 1, kernel_size=1),
+        )
+    def forward(self, x, H, W):
+        """
+        Input: x: (B, H*W, C), H, W
+        Output: x: (B, H*W, C)
+        """
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
+        qkv = qkv.permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]
+        q = q.transpose(-2, -1)
+        k = k.transpose(-2, -1)
+        v = v.transpose(-2, -1)
+        v_ = v.reshape(B, C, N).contiguous().view(B, C, H, W)
+        q = torch.nn.functional.normalize(q, dim=-1)
+        k = torch.nn.functional.normalize(k, dim=-1)
+        attn = (q @ k.transpose(-2, -1)) * self.temperature
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        # attention output
+        attened_x = (attn @ v).permute(0, 3, 1, 2).reshape(B, N, C)
+        # convolution output
+        conv_x = self.dwconv(v_)
+        # Adaptive Interaction Module (AIM)
+        # C-Map (before sigmoid)
+        attention_reshape = attened_x.transpose(-2, -1).contiguous().view(B, C, H, W)
+        channel_map = self.channel_interaction(attention_reshape)
+        # S-Map (before sigmoid)
+        spatial_map = (
+            self.spatial_interaction(conv_x)
+            .permute(0, 2, 3, 1)
+            .contiguous()
+            .view(B, N, 1)
+        )
+        # S-I
+        attened_x = attened_x * torch.sigmoid(spatial_map)
+        # C-I
+        conv_x = conv_x * torch.sigmoid(channel_map)
+        conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(B, N, C)
+        x = attened_x + conv_x
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class DATB(nn.Module):
+    def __init__(
+        self,
+        dim,
+        num_heads,
+        reso=64,
+        split_size=[2, 4],
+        shift_size=[1, 2],
+        expansion_factor=4.0,
+        qkv_bias=False,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+        rg_idx=0,
+        b_idx=0,
+    ):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        if b_idx % 2 == 0:
+            # DSTB
+            self.attn = Adaptive_Spatial_Attention(
+                dim,
+                num_heads=num_heads,
+                reso=reso,
+                split_size=split_size,
+                shift_size=shift_size,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop,
+                attn_drop=attn_drop,
+                rg_idx=rg_idx,
+                b_idx=b_idx,
+            )
+        else:
+            # DCTB
+            self.attn = Adaptive_Channel_Attention(
+                dim,
+                num_heads=num_heads,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                attn_drop=attn_drop,
+                proj_drop=drop,
+            )
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        ffn_hidden_dim = int(dim * expansion_factor)
+        self.ffn = SGFN(
+            in_features=dim,
+            hidden_features=ffn_hidden_dim,
+            out_features=dim,
+            act_layer=act_layer,
+        )
+        self.norm2 = norm_layer(dim)
+    def forward(self, x, x_size):
+        """
+        Input: x: (B, H*W, C), x_size: (H, W)
+        Output: x: (B, H*W, C)
+        """
+        H, W = x_size
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.ffn(self.norm2(x), H, W))
+        return x
+class ResidualGroup(nn.Module):
+    """ResidualGroup
+    Args:
+        dim (int): Number of input channels.
+        reso (int): Input resolution.
+        num_heads (int): Number of attention heads.
+        split_size (tuple(int)): Height and Width of spatial window.
+        expansion_factor (float): Ratio of ffn hidden dim to embedding dim.
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop (float): Dropout rate. Default: 0
+        attn_drop(float): Attention dropout rate. Default: 0
+        drop_paths (float | None): Stochastic depth rate.
+        act_layer (nn.Module): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm
+        depth (int): Number of dual aggregation Transformer blocks in residual group.
+        use_chk (bool): Whether to use checkpointing to save memory.
+        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
+    """
+    def __init__(
+        self,
+        dim,
+        reso,
+        num_heads,
+        split_size=[2, 4],
+        expansion_factor=4.0,
+        qkv_bias=False,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_paths=None,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+        depth=2,
+        use_chk=False,
+        resi_connection="1conv",
+        rg_idx=0,
+    ):
+        super().__init__()
+        self.use_chk = use_chk
+        self.reso = reso
+        self.blocks = nn.ModuleList(
+            [
+                DATB(
+                    dim=dim,
+                    num_heads=num_heads,
+                    reso=reso,
+                    split_size=split_size,
+                    shift_size=[split_size[0] // 2, split_size[1] // 2],
+                    expansion_factor=expansion_factor,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop,
+                    attn_drop=attn_drop,
+                    drop_path=drop_paths[i],
+                    act_layer=act_layer,
+                    norm_layer=norm_layer,
+                    rg_idx=rg_idx,
+                    b_idx=i,
+                )
+                for i in range(depth)
+            ]
+        )
+        if resi_connection == "1conv":
+            self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
+        elif resi_connection == "3conv":
+            self.conv = nn.Sequential(
+                nn.Conv2d(dim, dim // 4, 3, 1, 1),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(dim // 4, dim, 3, 1, 1),
+            )
+    def forward(self, x, x_size):
+        """
+        Input: x: (B, H*W, C), x_size: (H, W)
+        Output: x: (B, H*W, C)
+        """
+        H, W = x_size
+        res = x
+        for blk in self.blocks:
+            if self.use_chk:
+                x = checkpoint.checkpoint(blk, x, x_size)
+            else:
+                x = blk(x, x_size)
+        x = rearrange(x, "b (h w) c -> b c h w", h=H, w=W)
+        x = self.conv(x)
+        x = rearrange(x, "b c h w -> b (h w) c")
+        x = res + x
+        return x
+class Upsample(nn.Sequential):
+    """Upsample module.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat):
+        m = []
+        if (scale & (scale - 1)) == 0:  # scale = 2^n
+            for _ in range(int(math.log(scale, 2))):
+                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
+                m.append(nn.PixelShuffle(2))
+        elif scale == 3:
+            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
+            m.append(nn.PixelShuffle(3))
+        else:
+            raise ValueError(
+                f"scale {scale} is not supported. " "Supported scales: 2^n and 3."
+            )
+        super(Upsample, self).__init__(*m)
+class UpsampleOneStep(nn.Sequential):
+    """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
+       Used in lightweight SR to save parameters.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
+        self.num_feat = num_feat
+        self.input_resolution = input_resolution
+        m = []
+        m.append(nn.Conv2d(num_feat, (scale**2) * num_out_ch, 3, 1, 1))
+        m.append(nn.PixelShuffle(scale))
+        super(UpsampleOneStep, self).__init__(*m)
+    def flops(self):
+        h, w = self.input_resolution
+        flops = h * w * self.num_feat * 3 * 9
+        return flops
+class DAT(nn.Module):
+    """Dual Aggregation Transformer
+    Args:
+        img_size (int): Input image size. Default: 64
+        in_chans (int): Number of input image channels. Default: 3
+        embed_dim (int): Patch embedding dimension. Default: 180
+        depths (tuple(int)): Depth of each residual group (number of DATB in each RG).
+        split_size (tuple(int)): Height and Width of spatial window.
+        num_heads (tuple(int)): Number of attention heads in different residual groups.
+        expansion_factor (float): Ratio of ffn hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        act_layer (nn.Module): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm
+        use_chk (bool): Whether to use checkpointing to save memory.
+        upscale: Upscale factor. 2/3/4 for image SR
+        img_range: Image range. 1. or 255.
+        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
+    """
+    def __init__(self, state_dict):
+        super().__init__()
+        # defaults
+        img_size = 64
+        in_chans = 3
+        embed_dim = 180
+        split_size = [2, 4]
+        depth = [2, 2, 2, 2]
+        num_heads = [2, 2, 2, 2]
+        expansion_factor = 4.0
+        qkv_bias = True
+        qk_scale = None
+        drop_rate = 0.0
+        attn_drop_rate = 0.0
+        drop_path_rate = 0.1
+        act_layer = nn.GELU
+        norm_layer = nn.LayerNorm
+        use_chk = False
+        upscale = 2
+        img_range = 1.0
+        resi_connection = "1conv"
+        upsampler = "pixelshuffle"
+        self.model_arch = "DAT"
+        self.sub_type = "SR"
+        self.state = state_dict
+        state_keys = state_dict.keys()
+        if "conv_before_upsample.0.weight" in state_keys:
+            if "conv_up1.weight" in state_keys:
+                upsampler = "nearest+conv"
+            else:
+                upsampler = "pixelshuffle"
+                supports_fp16 = False
+        elif "upsample.0.weight" in state_keys:
+            upsampler = "pixelshuffledirect"
+        else:
+            upsampler = ""
+        num_feat = (
+            state_dict.get("conv_before_upsample.0.weight", None).shape[1]
+            if state_dict.get("conv_before_upsample.weight", None)
+            else 64
+        )
+        num_in_ch = state_dict["conv_first.weight"].shape[1]
+        in_chans = num_in_ch
+        if "conv_last.weight" in state_keys:
+            num_out_ch = state_dict["conv_last.weight"].shape[0]
+        else:
+            num_out_ch = num_in_ch
+        upscale = 1
+        if upsampler == "nearest+conv":
+            upsample_keys = [
+                x for x in state_keys if "conv_up" in x and "bias" not in x
+            ]
+            for upsample_key in upsample_keys:
+                upscale *= 2
+        elif upsampler == "pixelshuffle":
+            upsample_keys = [
+                x
+                for x in state_keys
+                if "upsample" in x and "conv" not in x and "bias" not in x
+            ]
+            for upsample_key in upsample_keys:
+                shape = state_dict[upsample_key].shape[0]
+                upscale *= math.sqrt(shape // num_feat)
+            upscale = int(upscale)
+        elif upsampler == "pixelshuffledirect":
+            upscale = int(
+                math.sqrt(state_dict["upsample.0.bias"].shape[0] // num_out_ch)
+            )
+        max_layer_num = 0
+        max_block_num = 0
+        for key in state_keys:
+            result = re.match(r"layers.(\d*).blocks.(\d*).norm1.weight", key)
+            if result:
+                layer_num, block_num = result.groups()
+                max_layer_num = max(max_layer_num, int(layer_num))
+                max_block_num = max(max_block_num, int(block_num))
+        depth = [max_block_num + 1 for _ in range(max_layer_num + 1)]
+        if "layers.0.blocks.1.attn.temperature" in state_keys:
+            num_heads_num = state_dict["layers.0.blocks.1.attn.temperature"].shape[0]
+            num_heads = [num_heads_num for _ in range(max_layer_num + 1)]
+        else:
+            num_heads = depth
+        embed_dim = state_dict["conv_first.weight"].shape[0]
+        expansion_factor = float(
+            state_dict["layers.0.blocks.0.ffn.fc1.weight"].shape[0] / embed_dim
+        )
+        # TODO: could actually count the layers, but this should do
+        if "layers.0.conv.4.weight" in state_keys:
+            resi_connection = "3conv"
+        else:
+            resi_connection = "1conv"
+        if "layers.0.blocks.2.attn.attn_mask_0" in state_keys:
+            attn_mask_0_x, attn_mask_0_y, attn_mask_0_z = state_dict[
+                "layers.0.blocks.2.attn.attn_mask_0"
+            ].shape
+            img_size = int(math.sqrt(attn_mask_0_x * attn_mask_0_y))
+        if "layers.0.blocks.0.attn.attns.0.rpe_biases" in state_keys:
+            split_sizes = (
+                state_dict["layers.0.blocks.0.attn.attns.0.rpe_biases"][-1] + 1
+            )
+            split_size = [int(x) for x in split_sizes]
+        self.in_nc = num_in_ch
+        self.out_nc = num_out_ch
+        self.num_feat = num_feat
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.depth = depth
+        self.scale = upscale
+        self.upsampler = upsampler
+        self.img_size = img_size
+        self.img_range = img_range
+        self.expansion_factor = expansion_factor
+        self.resi_connection = resi_connection
+        self.split_size = split_size
+        self.supports_fp16 = False  # Too much weirdness to support this at the moment
+        self.supports_bfp16 = True
+        self.min_size_restriction = 16
+        num_in_ch = in_chans
+        num_out_ch = in_chans
+        num_feat = 64
+        self.img_range = img_range
+        if in_chans == 3:
+            rgb_mean = (0.4488, 0.4371, 0.4040)
+            self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
+        else:
+            self.mean = torch.zeros(1, 1, 1, 1)
+        self.upscale = upscale
+        self.upsampler = upsampler
+        # ------------------------- 1, Shallow Feature Extraction ------------------------- #
+        self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
+        # ------------------------- 2, Deep Feature Extraction ------------------------- #
+        self.num_layers = len(depth)
+        self.use_chk = use_chk
+        self.num_features = (
+            self.embed_dim
+        ) = embed_dim  # num_features for consistency with other models
+        heads = num_heads
+        self.before_RG = nn.Sequential(
+            Rearrange("b c h w -> b (h w) c"), nn.LayerNorm(embed_dim)
+        )
+        curr_dim = embed_dim
+        dpr = [
+            x.item() for x in torch.linspace(0, drop_path_rate, np.sum(depth))
+        ]  # stochastic depth decay rule
+        self.layers = nn.ModuleList()
+        for i in range(self.num_layers):
+            layer = ResidualGroup(
+                dim=embed_dim,
+                num_heads=heads[i],
+                reso=img_size,
+                split_size=split_size,
+                expansion_factor=expansion_factor,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                attn_drop=attn_drop_rate,
+                drop_paths=dpr[sum(depth[:i]) : sum(depth[: i + 1])],
+                act_layer=act_layer,
+                norm_layer=norm_layer,
+                depth=depth[i],
+                use_chk=use_chk,
+                resi_connection=resi_connection,
+                rg_idx=i,
+            )
+            self.layers.append(layer)
+        self.norm = norm_layer(curr_dim)
+        # build the last conv layer in deep feature extraction
+        if resi_connection == "1conv":
+            self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
+        elif resi_connection == "3conv":
+            # to save parameters and memory
+            self.conv_after_body = nn.Sequential(
+                nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1),
+            )
+        # ------------------------- 3, Reconstruction ------------------------- #
+        if self.upsampler == "pixelshuffle":
+            # for classical SR
+            self.conv_before_upsample = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.upsample = Upsample(upscale, num_feat)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        elif self.upsampler == "pixelshuffledirect":
+            # for lightweight SR (to save parameters)
+            self.upsample = UpsampleOneStep(
+                upscale, embed_dim, num_out_ch, (img_size, img_size)
+            )
+        self.apply(self._init_weights)
+        self.load_state_dict(state_dict, strict=True)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(
+            m, (nn.LayerNorm, nn.BatchNorm2d, nn.GroupNorm, nn.InstanceNorm2d)
+        ):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    def forward_features(self, x):
+        _, _, H, W = x.shape
+        x_size = [H, W]
+        x = self.before_RG(x)
+        for layer in self.layers:
+            x = layer(x, x_size)
+        x = self.norm(x)
+        x = rearrange(x, "b (h w) c -> b c h w", h=H, w=W)
+        return x
+    def forward(self, x):
+        """
+        Input: x: (B, C, H, W)
+        """
+        self.mean = self.mean.type_as(x)
+        x = (x - self.mean) * self.img_range
+        if self.upsampler == "pixelshuffle":
+            # for image SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.conv_last(self.upsample(x))
+        elif self.upsampler == "pixelshuffledirect":
+            # for lightweight SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.upsample(x)
+        x = x / self.img_range + self.mean
+        return x

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/HAT.py ADDED Viewed

	@@ -0,0 +1,1277 @@

+# pylint: skip-file
+# HAT from https://github.com/XPixelGroup/HAT/blob/main/hat/archs/hat_arch.py
+import math
+import re
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from .timm.helpers import to_2tuple
+from .timm.weight_init import trunc_normal_
+def drop_path(x, drop_prob: float = 0.0, training: bool = False):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py
+    """
+    if drop_prob == 0.0 or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (
+        x.ndim - 1
+    )  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)  # type: ignore
+class ChannelAttention(nn.Module):
+    """Channel attention used in RCAN.
+    Args:
+        num_feat (int): Channel number of intermediate features.
+        squeeze_factor (int): Channel squeeze factor. Default: 16.
+    """
+    def __init__(self, num_feat, squeeze_factor=16):
+        super(ChannelAttention, self).__init__()
+        self.attention = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(num_feat, num_feat // squeeze_factor, 1, padding=0),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(num_feat // squeeze_factor, num_feat, 1, padding=0),
+            nn.Sigmoid(),
+        )
+    def forward(self, x):
+        y = self.attention(x)
+        return x * y
+class CAB(nn.Module):
+    def __init__(self, num_feat, compress_ratio=3, squeeze_factor=30):
+        super(CAB, self).__init__()
+        self.cab = nn.Sequential(
+            nn.Conv2d(num_feat, num_feat // compress_ratio, 3, 1, 1),
+            nn.GELU(),
+            nn.Conv2d(num_feat // compress_ratio, num_feat, 3, 1, 1),
+            ChannelAttention(num_feat, squeeze_factor),
+        )
+    def forward(self, x):
+        return self.cab(x)
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        act_layer=nn.GELU,
+        drop=0.0,
+    ):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (b, h, w, c)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*b, window_size, window_size, c)
+    """
+    b, h, w, c = x.shape
+    x = x.view(b, h // window_size, window_size, w // window_size, window_size, c)
+    windows = (
+        x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, c)
+    )
+    return windows
+def window_reverse(windows, window_size, h, w):
+    """
+    Args:
+        windows: (num_windows*b, window_size, window_size, c)
+        window_size (int): Window size
+        h (int): Height of image
+        w (int): Width of image
+    Returns:
+        x: (b, h, w, c)
+    """
+    b = int(windows.shape[0] / (h * w / window_size / window_size))
+    x = windows.view(
+        b, h // window_size, w // window_size, window_size, window_size, -1
+    )
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(b, h, w, -1)
+    return x
+class WindowAttention(nn.Module):
+    r"""Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+    def __init__(
+        self,
+        dim,
+        window_size,
+        num_heads,
+        qkv_bias=True,
+        qk_scale=None,
+        attn_drop=0.0,
+        proj_drop=0.0,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(  # type: ignore
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)
+        )  # 2*Wh-1 * 2*Ww-1, nH
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        trunc_normal_(self.relative_position_bias_table, std=0.02)
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x, rpi, mask=None):
+        """
+        Args:
+            x: input features with shape of (num_windows*b, n, c)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        b_, n, c = x.shape
+        qkv = (
+            self.qkv(x)
+            .reshape(b_, n, 3, self.num_heads, c // self.num_heads)
+            .permute(2, 0, 3, 1, 4)
+        )
+        q, k, v = (
+            qkv[0],
+            qkv[1],
+            qkv[2],
+        )  # make torchscript happy (cannot use tensor as tuple)
+        q = q * self.scale
+        attn = q @ k.transpose(-2, -1)
+        relative_position_bias = self.relative_position_bias_table[rpi.view(-1)].view(
+            self.window_size[0] * self.window_size[1],
+            self.window_size[0] * self.window_size[1],
+            -1,
+        )  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(
+            2, 0, 1
+        ).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+        if mask is not None:
+            nw = mask.shape[0]
+            attn = attn.view(b_ // nw, nw, self.num_heads, n, n) + mask.unsqueeze(
+                1
+            ).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, n, n)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(b_, n, c)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class HAB(nn.Module):
+    r"""Hybrid Attention Block.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        num_heads,
+        window_size=7,
+        shift_size=0,
+        compress_ratio=3,
+        squeeze_factor=30,
+        conv_scale=0.01,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        if min(self.input_resolution) <= self.window_size:
+            # if window size is larger than input resolution, we don't partition windows
+            self.shift_size = 0
+            self.window_size = min(self.input_resolution)
+        assert (
+            0 <= self.shift_size < self.window_size
+        ), "shift_size must in 0-window_size"
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim,
+            window_size=to_2tuple(self.window_size),
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+        )
+        self.conv_scale = conv_scale
+        self.conv_block = CAB(
+            num_feat=dim, compress_ratio=compress_ratio, squeeze_factor=squeeze_factor
+        )
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+        )
+    def forward(self, x, x_size, rpi_sa, attn_mask):
+        h, w = x_size
+        b, _, c = x.shape
+        # assert seq_len == h * w, "input feature has wrong size"
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(b, h, w, c)
+        # Conv_X
+        conv_x = self.conv_block(x.permute(0, 3, 1, 2))
+        conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(b, h * w, c)
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(
+                x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)
+            )
+            attn_mask = attn_mask
+        else:
+            shifted_x = x
+            attn_mask = None
+        # partition windows
+        x_windows = window_partition(
+            shifted_x, self.window_size
+        )  # nw*b, window_size, window_size, c
+        x_windows = x_windows.view(
+            -1, self.window_size * self.window_size, c
+        )  # nw*b, window_size*window_size, c
+        # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
+        attn_windows = self.attn(x_windows, rpi=rpi_sa, mask=attn_mask)
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, c)
+        shifted_x = window_reverse(attn_windows, self.window_size, h, w)  # b h' w' c
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            attn_x = torch.roll(
+                shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)
+            )
+        else:
+            attn_x = shifted_x
+        attn_x = attn_x.view(b, h * w, c)
+        # FFN
+        x = shortcut + self.drop_path(attn_x) + conv_x * self.conv_scale
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+class PatchMerging(nn.Module):
+    r"""Patch Merging Layer.
+    Args:
+        input_resolution (tuple[int]): Resolution of input feature.
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(4 * dim)
+    def forward(self, x):
+        """
+        x: b, h*w, c
+        """
+        h, w = self.input_resolution
+        b, seq_len, c = x.shape
+        assert seq_len == h * w, "input feature has wrong size"
+        assert h % 2 == 0 and w % 2 == 0, f"x size ({h}*{w}) are not even."
+        x = x.view(b, h, w, c)
+        x0 = x[:, 0::2, 0::2, :]  # b h/2 w/2 c
+        x1 = x[:, 1::2, 0::2, :]  # b h/2 w/2 c
+        x2 = x[:, 0::2, 1::2, :]  # b h/2 w/2 c
+        x3 = x[:, 1::2, 1::2, :]  # b h/2 w/2 c
+        x = torch.cat([x0, x1, x2, x3], -1)  # b h/2 w/2 4*c
+        x = x.view(b, -1, 4 * c)  # b h/2*w/2 4*c
+        x = self.norm(x)
+        x = self.reduction(x)
+        return x
+class OCAB(nn.Module):
+    # overlapping cross-attention block
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        window_size,
+        overlap_ratio,
+        num_heads,
+        qkv_bias=True,
+        qk_scale=None,
+        mlp_ratio=2,
+        norm_layer=nn.LayerNorm,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.window_size = window_size
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+        self.overlap_win_size = int(window_size * overlap_ratio) + window_size
+        self.norm1 = norm_layer(dim)
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.unfold = nn.Unfold(
+            kernel_size=(self.overlap_win_size, self.overlap_win_size),
+            stride=window_size,
+            padding=(self.overlap_win_size - window_size) // 2,
+        )
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(  # type: ignore
+            torch.zeros(
+                (window_size + self.overlap_win_size - 1)
+                * (window_size + self.overlap_win_size - 1),
+                num_heads,
+            )
+        )  # 2*Wh-1 * 2*Ww-1, nH
+        trunc_normal_(self.relative_position_bias_table, std=0.02)
+        self.softmax = nn.Softmax(dim=-1)
+        self.proj = nn.Linear(dim, dim)
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(
+            in_features=dim, hidden_features=mlp_hidden_dim, act_layer=nn.GELU
+        )
+    def forward(self, x, x_size, rpi):
+        h, w = x_size
+        b, _, c = x.shape
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(b, h, w, c)
+        qkv = self.qkv(x).reshape(b, h, w, 3, c).permute(3, 0, 4, 1, 2)  # 3, b, c, h, w
+        q = qkv[0].permute(0, 2, 3, 1)  # b, h, w, c
+        kv = torch.cat((qkv[1], qkv[2]), dim=1)  # b, 2*c, h, w
+        # partition windows
+        q_windows = window_partition(
+            q, self.window_size
+        )  # nw*b, window_size, window_size, c
+        q_windows = q_windows.view(
+            -1, self.window_size * self.window_size, c
+        )  # nw*b, window_size*window_size, c
+        kv_windows = self.unfold(kv)  # b, c*w*w, nw
+        kv_windows = rearrange(
+            kv_windows,
+            "b (nc ch owh oww) nw -> nc (b nw) (owh oww) ch",
+            nc=2,
+            ch=c,
+            owh=self.overlap_win_size,
+            oww=self.overlap_win_size,
+        ).contiguous()  # 2, nw*b, ow*ow, c
+        # Do the above rearrangement without the rearrange function
+        # kv_windows = kv_windows.view(
+        #     2, b, self.overlap_win_size, self.overlap_win_size, c, -1
+        # )
+        # kv_windows = kv_windows.permute(0, 5, 1, 2, 3, 4).contiguous()
+        # kv_windows = kv_windows.view(
+        #     2, -1, self.overlap_win_size * self.overlap_win_size, c
+        # )
+        k_windows, v_windows = kv_windows[0], kv_windows[1]  # nw*b, ow*ow, c
+        b_, nq, _ = q_windows.shape
+        _, n, _ = k_windows.shape
+        d = self.dim // self.num_heads
+        q = q_windows.reshape(b_, nq, self.num_heads, d).permute(
+            0, 2, 1, 3
+        )  # nw*b, nH, nq, d
+        k = k_windows.reshape(b_, n, self.num_heads, d).permute(
+            0, 2, 1, 3
+        )  # nw*b, nH, n, d
+        v = v_windows.reshape(b_, n, self.num_heads, d).permute(
+            0, 2, 1, 3
+        )  # nw*b, nH, n, d
+        q = q * self.scale
+        attn = q @ k.transpose(-2, -1)
+        relative_position_bias = self.relative_position_bias_table[rpi.view(-1)].view(
+            self.window_size * self.window_size,
+            self.overlap_win_size * self.overlap_win_size,
+            -1,
+        )  # ws*ws, wse*wse, nH
+        relative_position_bias = relative_position_bias.permute(
+            2, 0, 1
+        ).contiguous()  # nH, ws*ws, wse*wse
+        attn = attn + relative_position_bias.unsqueeze(0)
+        attn = self.softmax(attn)
+        attn_windows = (attn @ v).transpose(1, 2).reshape(b_, nq, self.dim)
+        # merge windows
+        attn_windows = attn_windows.view(
+            -1, self.window_size, self.window_size, self.dim
+        )
+        x = window_reverse(attn_windows, self.window_size, h, w)  # b h w c
+        x = x.view(b, h * w, self.dim)
+        x = self.proj(x) + shortcut
+        x = x + self.mlp(self.norm2(x))
+        return x
+class AttenBlocks(nn.Module):
+    """A series of attention blocks for one RHAG.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        depth,
+        num_heads,
+        window_size,
+        compress_ratio,
+        squeeze_factor,
+        conv_scale,
+        overlap_ratio,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        norm_layer=nn.LayerNorm,
+        downsample=None,
+        use_checkpoint=False,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+        # build blocks
+        self.blocks = nn.ModuleList(
+            [
+                HAB(
+                    dim=dim,
+                    input_resolution=input_resolution,
+                    num_heads=num_heads,
+                    window_size=window_size,
+                    shift_size=0 if (i % 2 == 0) else window_size // 2,
+                    compress_ratio=compress_ratio,
+                    squeeze_factor=squeeze_factor,
+                    conv_scale=conv_scale,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop,
+                    attn_drop=attn_drop,
+                    drop_path=drop_path[i]
+                    if isinstance(drop_path, list)
+                    else drop_path,
+                    norm_layer=norm_layer,
+                )
+                for i in range(depth)
+            ]
+        )
+        # OCAB
+        self.overlap_attn = OCAB(
+            dim=dim,
+            input_resolution=input_resolution,
+            window_size=window_size,
+            overlap_ratio=overlap_ratio,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            mlp_ratio=mlp_ratio,  # type: ignore
+            norm_layer=norm_layer,
+        )
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(
+                input_resolution, dim=dim, norm_layer=norm_layer
+            )
+        else:
+            self.downsample = None
+    def forward(self, x, x_size, params):
+        for blk in self.blocks:
+            x = blk(x, x_size, params["rpi_sa"], params["attn_mask"])
+        x = self.overlap_attn(x, x_size, params["rpi_oca"])
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return x
+class RHAG(nn.Module):
+    """Residual Hybrid Attention Group (RHAG).
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+        img_size: Input image size.
+        patch_size: Patch size.
+        resi_connection: The convolutional block before residual connection.
+    """
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        depth,
+        num_heads,
+        window_size,
+        compress_ratio,
+        squeeze_factor,
+        conv_scale,
+        overlap_ratio,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        norm_layer=nn.LayerNorm,
+        downsample=None,
+        use_checkpoint=False,
+        img_size=224,
+        patch_size=4,
+        resi_connection="1conv",
+    ):
+        super(RHAG, self).__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.residual_group = AttenBlocks(
+            dim=dim,
+            input_resolution=input_resolution,
+            depth=depth,
+            num_heads=num_heads,
+            window_size=window_size,
+            compress_ratio=compress_ratio,
+            squeeze_factor=squeeze_factor,
+            conv_scale=conv_scale,
+            overlap_ratio=overlap_ratio,
+            mlp_ratio=mlp_ratio,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            drop=drop,
+            attn_drop=attn_drop,
+            drop_path=drop_path,
+            norm_layer=norm_layer,
+            downsample=downsample,
+            use_checkpoint=use_checkpoint,
+        )
+        if resi_connection == "1conv":
+            self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
+        elif resi_connection == "identity":
+            self.conv = nn.Identity()
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=0,
+            embed_dim=dim,
+            norm_layer=None,
+        )
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=0,
+            embed_dim=dim,
+            norm_layer=None,
+        )
+    def forward(self, x, x_size, params):
+        return (
+            self.patch_embed(
+                self.conv(
+                    self.patch_unembed(self.residual_group(x, x_size, params), x_size)
+                )
+            )
+            + x
+        )
+class PatchEmbed(nn.Module):
+    r"""Image to Patch Embedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(
+        self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None
+    ):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [
+            img_size[0] // patch_size[0],  # type: ignore
+            img_size[1] // patch_size[1],  # type: ignore
+        ]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+    def forward(self, x):
+        x = x.flatten(2).transpose(1, 2)  # b Ph*Pw c
+        if self.norm is not None:
+            x = self.norm(x)
+        return x
+class PatchUnEmbed(nn.Module):
+    r"""Image to Patch Unembedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(
+        self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None
+    ):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [
+            img_size[0] // patch_size[0],  # type: ignore
+            img_size[1] // patch_size[1],  # type: ignore
+        ]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+    def forward(self, x, x_size):
+        x = (
+            x.transpose(1, 2)
+            .contiguous()
+            .view(x.shape[0], self.embed_dim, x_size[0], x_size[1])
+        )  # b Ph*Pw c
+        return x
+class Upsample(nn.Sequential):
+    """Upsample module.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat):
+        m = []
+        if (scale & (scale - 1)) == 0:  # scale = 2^n
+            for _ in range(int(math.log(scale, 2))):
+                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
+                m.append(nn.PixelShuffle(2))
+        elif scale == 3:
+            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
+            m.append(nn.PixelShuffle(3))
+        else:
+            raise ValueError(
+                f"scale {scale} is not supported. " "Supported scales: 2^n and 3."
+            )
+        super(Upsample, self).__init__(*m)
+class HAT(nn.Module):
+    r"""Hybrid Attention Transformer
+        A PyTorch implementation of : `Activating More Pixels in Image Super-Resolution Transformer`.
+        Some codes are based on SwinIR.
+    Args:
+        img_size (int | tuple(int)): Input image size. Default 64
+        patch_size (int | tuple(int)): Patch size. Default: 1
+        in_chans (int): Number of input image channels. Default: 3
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
+        upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
+        img_range: Image range. 1. or 255.
+        upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
+        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
+    """
+    def __init__(
+        self,
+        state_dict,
+        **kwargs,
+    ):
+        super(HAT, self).__init__()
+        # Defaults
+        img_size = 64
+        patch_size = 1
+        in_chans = 3
+        embed_dim = 96
+        depths = (6, 6, 6, 6)
+        num_heads = (6, 6, 6, 6)
+        window_size = 7
+        compress_ratio = 3
+        squeeze_factor = 30
+        conv_scale = 0.01
+        overlap_ratio = 0.5
+        mlp_ratio = 4.0
+        qkv_bias = True
+        qk_scale = None
+        drop_rate = 0.0
+        attn_drop_rate = 0.0
+        drop_path_rate = 0.1
+        norm_layer = nn.LayerNorm
+        ape = False
+        patch_norm = True
+        use_checkpoint = False
+        upscale = 2
+        img_range = 1.0
+        upsampler = ""
+        resi_connection = "1conv"
+        self.state = state_dict
+        self.model_arch = "HAT"
+        self.sub_type = "SR"
+        self.supports_fp16 = False
+        self.support_bf16 = True
+        self.min_size_restriction = 16
+        state_keys = list(state_dict.keys())
+        num_feat = state_dict["conv_last.weight"].shape[1]
+        in_chans = state_dict["conv_first.weight"].shape[1]
+        num_out_ch = state_dict["conv_last.weight"].shape[0]
+        embed_dim = state_dict["conv_first.weight"].shape[0]
+        if "conv_before_upsample.0.weight" in state_keys:
+            if "conv_up1.weight" in state_keys:
+                upsampler = "nearest+conv"
+            else:
+                upsampler = "pixelshuffle"
+                supports_fp16 = False
+        elif "upsample.0.weight" in state_keys:
+            upsampler = "pixelshuffledirect"
+        else:
+            upsampler = ""
+        upscale = 1
+        if upsampler == "nearest+conv":
+            upsample_keys = [
+                x for x in state_keys if "conv_up" in x and "bias" not in x
+            ]
+            for upsample_key in upsample_keys:
+                upscale *= 2
+        elif upsampler == "pixelshuffle":
+            upsample_keys = [
+                x
+                for x in state_keys
+                if "upsample" in x and "conv" not in x and "bias" not in x
+            ]
+            for upsample_key in upsample_keys:
+                shape = self.state[upsample_key].shape[0]
+                upscale *= math.sqrt(shape // num_feat)
+            upscale = int(upscale)
+        elif upsampler == "pixelshuffledirect":
+            upscale = int(
+                math.sqrt(self.state["upsample.0.bias"].shape[0] // num_out_ch)
+            )
+        max_layer_num = 0
+        max_block_num = 0
+        for key in state_keys:
+            result = re.match(
+                r"layers.(\d*).residual_group.blocks.(\d*).conv_block.cab.0.weight", key
+            )
+            if result:
+                layer_num, block_num = result.groups()
+                max_layer_num = max(max_layer_num, int(layer_num))
+                max_block_num = max(max_block_num, int(block_num))
+        depths = [max_block_num + 1 for _ in range(max_layer_num + 1)]
+        if (
+            "layers.0.residual_group.blocks.0.attn.relative_position_bias_table"
+            in state_keys
+        ):
+            num_heads_num = self.state[
+                "layers.0.residual_group.blocks.0.attn.relative_position_bias_table"
+            ].shape[-1]
+            num_heads = [num_heads_num for _ in range(max_layer_num + 1)]
+        else:
+            num_heads = depths
+        mlp_ratio = float(
+            self.state["layers.0.residual_group.blocks.0.mlp.fc1.bias"].shape[0]
+            / embed_dim
+        )
+        # TODO: could actually count the layers, but this should do
+        if "layers.0.conv.4.weight" in state_keys:
+            resi_connection = "3conv"
+        else:
+            resi_connection = "1conv"
+        window_size = int(math.sqrt(self.state["relative_position_index_SA"].shape[0]))
+        # Not sure if this is needed or used at all anywhere in HAT's config
+        if "layers.0.residual_group.blocks.1.attn_mask" in state_keys:
+            img_size = int(
+                math.sqrt(
+                    self.state["layers.0.residual_group.blocks.1.attn_mask"].shape[0]
+                )
+                * window_size
+            )
+        self.window_size = window_size
+        self.shift_size = window_size // 2
+        self.overlap_ratio = overlap_ratio
+        self.in_nc = in_chans
+        self.out_nc = num_out_ch
+        self.num_feat = num_feat
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.depths = depths
+        self.window_size = window_size
+        self.mlp_ratio = mlp_ratio
+        self.scale = upscale
+        self.upsampler = upsampler
+        self.img_size = img_size
+        self.img_range = img_range
+        self.resi_connection = resi_connection
+        num_in_ch = in_chans
+        # num_out_ch = in_chans
+        # num_feat = 64
+        self.img_range = img_range
+        if in_chans == 3:
+            rgb_mean = (0.4488, 0.4371, 0.4040)
+            self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
+        else:
+            self.mean = torch.zeros(1, 1, 1, 1)
+        self.upscale = upscale
+        self.upsampler = upsampler
+        # relative position index
+        relative_position_index_SA = self.calculate_rpi_sa()
+        relative_position_index_OCA = self.calculate_rpi_oca()
+        self.register_buffer("relative_position_index_SA", relative_position_index_SA)
+        self.register_buffer("relative_position_index_OCA", relative_position_index_OCA)
+        # ------------------------- 1, shallow feature extraction ------------------------- #
+        self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
+        # ------------------------- 2, deep feature extraction ------------------------- #
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.num_features = embed_dim
+        self.mlp_ratio = mlp_ratio
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=embed_dim,
+            embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None,
+        )
+        num_patches = self.patch_embed.num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+        # merge non-overlapping patches into image
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=embed_dim,
+            embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None,
+        )
+        # absolute position embedding
+        if self.ape:
+            self.absolute_pos_embed = nn.Parameter(  # type: ignore[arg-type]
+                torch.zeros(1, num_patches, embed_dim)
+            )
+            trunc_normal_(self.absolute_pos_embed, std=0.02)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        # stochastic depth
+        dpr = [
+            x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
+        ]  # stochastic depth decay rule
+        # build Residual Hybrid Attention Groups (RHAG)
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = RHAG(
+                dim=embed_dim,
+                input_resolution=(patches_resolution[0], patches_resolution[1]),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                compress_ratio=compress_ratio,
+                squeeze_factor=squeeze_factor,
+                conv_scale=conv_scale,
+                overlap_ratio=overlap_ratio,
+                mlp_ratio=self.mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[
+                    sum(depths[:i_layer]) : sum(depths[: i_layer + 1])  # type: ignore
+                ],  # no impact on SR results
+                norm_layer=norm_layer,
+                downsample=None,
+                use_checkpoint=use_checkpoint,
+                img_size=img_size,
+                patch_size=patch_size,
+                resi_connection=resi_connection,
+            )
+            self.layers.append(layer)
+        self.norm = norm_layer(self.num_features)
+        # build the last conv layer in deep feature extraction
+        if resi_connection == "1conv":
+            self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
+        elif resi_connection == "identity":
+            self.conv_after_body = nn.Identity()
+        # ------------------------- 3, high quality image reconstruction ------------------------- #
+        if self.upsampler == "pixelshuffle":
+            # for classical SR
+            self.conv_before_upsample = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.upsample = Upsample(upscale, num_feat)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        self.apply(self._init_weights)
+        self.load_state_dict(self.state, strict=False)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    def calculate_rpi_sa(self):
+        # calculate relative position index for SA
+        coords_h = torch.arange(self.window_size)
+        coords_w = torch.arange(self.window_size)
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = (
+            coords_flatten[:, :, None] - coords_flatten[:, None, :]
+        )  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(
+            1, 2, 0
+        ).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        return relative_position_index
+    def calculate_rpi_oca(self):
+        # calculate relative position index for OCA
+        window_size_ori = self.window_size
+        window_size_ext = self.window_size + int(self.overlap_ratio * self.window_size)
+        coords_h = torch.arange(window_size_ori)
+        coords_w = torch.arange(window_size_ori)
+        coords_ori = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, ws, ws
+        coords_ori_flatten = torch.flatten(coords_ori, 1)  # 2, ws*ws
+        coords_h = torch.arange(window_size_ext)
+        coords_w = torch.arange(window_size_ext)
+        coords_ext = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, wse, wse
+        coords_ext_flatten = torch.flatten(coords_ext, 1)  # 2, wse*wse
+        relative_coords = (
+            coords_ext_flatten[:, None, :] - coords_ori_flatten[:, :, None]
+        )  # 2, ws*ws, wse*wse
+        relative_coords = relative_coords.permute(
+            1, 2, 0
+        ).contiguous()  # ws*ws, wse*wse, 2
+        relative_coords[:, :, 0] += (
+            window_size_ori - window_size_ext + 1
+        )  # shift to start from 0
+        relative_coords[:, :, 1] += window_size_ori - window_size_ext + 1
+        relative_coords[:, :, 0] *= window_size_ori + window_size_ext - 1
+        relative_position_index = relative_coords.sum(-1)
+        return relative_position_index
+    def calculate_mask(self, x_size):
+        # calculate attention mask for SW-MSA
+        h, w = x_size
+        img_mask = torch.zeros((1, h, w, 1))  # 1 h w 1
+        h_slices = (
+            slice(0, -self.window_size),
+            slice(-self.window_size, -self.shift_size),
+            slice(-self.shift_size, None),
+        )
+        w_slices = (
+            slice(0, -self.window_size),
+            slice(-self.window_size, -self.shift_size),
+            slice(-self.shift_size, None),
+        )
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+        mask_windows = window_partition(
+            img_mask, self.window_size
+        )  # nw, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(
+            attn_mask == 0, float(0.0)
+        )
+        return attn_mask
+    @torch.jit.ignore  # type: ignore
+    def no_weight_decay(self):
+        return {"absolute_pos_embed"}
+    @torch.jit.ignore  # type: ignore
+    def no_weight_decay_keywords(self):
+        return {"relative_position_bias_table"}
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
+        mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect")
+        return x
+    def forward_features(self, x):
+        x_size = (x.shape[2], x.shape[3])
+        # Calculate attention mask and relative position index in advance to speed up inference.
+        # The original code is very time-cosuming for large window size.
+        attn_mask = self.calculate_mask(x_size).to(x.device)
+        params = {
+            "attn_mask": attn_mask,
+            "rpi_sa": self.relative_position_index_SA,
+            "rpi_oca": self.relative_position_index_OCA,
+        }
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x, x_size, params)
+        x = self.norm(x)  # b seq_len c
+        x = self.patch_unembed(x, x_size)
+        return x
+    def forward(self, x):
+        H, W = x.shape[2:]
+        self.mean = self.mean.type_as(x)
+        x = (x - self.mean) * self.img_range
+        x = self.check_image_size(x)
+        if self.upsampler == "pixelshuffle":
+            # for classical SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.conv_last(self.upsample(x))
+        x = x / self.img_range + self.mean
+        return x[:, :, : H * self.upscale, : W * self.upscale]

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-DAT ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-ESRGAN ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-HAT ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Xiangyu Chen
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-RealESRGAN ADDED Viewed

	@@ -0,0 +1,29 @@

+BSD 3-Clause License
+Copyright (c) 2021, Xintao Wang
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-SCUNet ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2022 Kai Zhang (cskaizhang@gmail.com, https://cszn.github.io/). All rights reserved.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-SPSR ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2018-2022 BasicSR Authors
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-SwiftSRGAN ADDED Viewed

	@@ -0,0 +1,121 @@

+Creative Commons Legal Code
+CC0 1.0 Universal
+    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+    LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+    INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+    REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+    PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+    THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+    HEREUNDER.
+Statement of Purpose
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not
+limited to, the following:
+  i. the right to reproduce, adapt, distribute, perform, display,
+     communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+     likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+     subject to the limitations in paragraph 4(a), below;
+  v. rights protecting the extraction, dissemination, use and reuse of data
+     in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+     European Parliament and of the Council of 11 March 1996 on the legal
+     protection of databases, and under any national implementation
+     thereof, including any amended or successor version of such
+     directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+     world based on applicable law or treaty, and any national
+     implementations thereof.
+2. Waiver. To the greatest extent permitted by, but not in contravention
+of, applicable law, Affirmer hereby overtly, fully, permanently,
+irrevocably and unconditionally waives, abandons, and surrenders all of
+Affirmer's Copyright and Related Rights and associated claims and causes
+of action, whether now known or unknown (including existing as well as
+future claims and causes of action), in the Work (i) in all territories
+worldwide, (ii) for the maximum duration provided by applicable law or
+treaty (including future time extensions), (iii) in any current or future
+medium and for any number of copies, and (iv) for any purpose whatsoever,
+including without limitation commercial, advertising or promotional
+purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+member of the public at large and to the detriment of Affirmer's heirs and
+successors, fully intending that such Waiver shall not be subject to
+revocation, rescission, cancellation, termination, or any other legal or
+equitable action to disrupt the quiet enjoyment of the Work by the public
+as contemplated by Affirmer's express Statement of Purpose.
+3. Public License Fallback. Should any part of the Waiver for any reason
+be judged legally invalid or ineffective under applicable law, then the
+Waiver shall be preserved to the maximum extent permitted taking into
+account Affirmer's express Statement of Purpose. In addition, to the
+extent the Waiver is so judged Affirmer hereby grants to each affected
+person a royalty-free, non transferable, non sublicensable, non exclusive,
+irrevocable and unconditional license to exercise Affirmer's Copyright and
+Related Rights in the Work (i) in all territories worldwide, (ii) for the
+maximum duration provided by applicable law or treaty (including future
+time extensions), (iii) in any current or future medium and for any number
+of copies, and (iv) for any purpose whatsoever, including without
+limitation commercial, advertising or promotional purposes (the
+"License"). The License shall be deemed effective as of the date CC0 was
+applied by Affirmer to the Work. Should any part of the License for any
+reason be judged legally invalid or ineffective under applicable law, such
+partial invalidity or ineffectiveness shall not invalidate the remainder
+of the License, and in such case Affirmer hereby affirms that he or she
+will not (i) exercise any of his or her remaining Copyright and Related
+Rights in the Work or (ii) assert any associated claims and causes of
+action with respect to the Work, in either case contrary to Affirmer's
+express Statement of Purpose.
+4. Limitations and Disclaimers.
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+    surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+    warranties of any kind concerning the Work, express, implied,
+    statutory or otherwise, including without limitation warranties of
+    title, merchantability, fitness for a particular purpose, non
+    infringement, or the absence of latent or other defects, accuracy, or
+    the present or absence of errors, whether or not discoverable, all to
+    the greatest extent permissible under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+    that may apply to the Work or any use thereof, including without
+    limitation any person's Copyright and Related Rights in the Work.
+    Further, Affirmer disclaims responsibility for obtaining any necessary
+    consents, permissions or other rights required for any use of the
+    Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+    party to this document and has no duty or obligation with respect to
+    this CC0 or use of the Work.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-Swin2SR ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [2021] [SwinIR Authors]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-SwinIR ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [2021] [SwinIR Authors]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LICENSE-lama ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [2021] Samsung Research
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/LaMa.py ADDED Viewed

	@@ -0,0 +1,694 @@

+# pylint: skip-file
+"""
+Model adapted from advimman's lama project: https://github.com/advimman/lama
+"""
+# Fast Fourier Convolution NeurIPS 2020
+# original implementation https://github.com/pkumivision/FFC/blob/main/model_zoo/ffc.py
+# paper https://proceedings.neurips.cc/paper/2020/file/2fd5d41ec6cfab47e32164d5624269b1-Paper.pdf
+from typing import List
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.transforms.functional import InterpolationMode, rotate
+class LearnableSpatialTransformWrapper(nn.Module):
+    def __init__(self, impl, pad_coef=0.5, angle_init_range=80, train_angle=True):
+        super().__init__()
+        self.impl = impl
+        self.angle = torch.rand(1) * angle_init_range
+        if train_angle:
+            self.angle = nn.Parameter(self.angle, requires_grad=True)
+        self.pad_coef = pad_coef
+    def forward(self, x):
+        if torch.is_tensor(x):
+            return self.inverse_transform(self.impl(self.transform(x)), x)
+        elif isinstance(x, tuple):
+            x_trans = tuple(self.transform(elem) for elem in x)
+            y_trans = self.impl(x_trans)
+            return tuple(
+                self.inverse_transform(elem, orig_x) for elem, orig_x in zip(y_trans, x)
+            )
+        else:
+            raise ValueError(f"Unexpected input type {type(x)}")
+    def transform(self, x):
+        height, width = x.shape[2:]
+        pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef)
+        x_padded = F.pad(x, [pad_w, pad_w, pad_h, pad_h], mode="reflect")
+        x_padded_rotated = rotate(
+            x_padded, self.angle.to(x_padded), InterpolationMode.BILINEAR, fill=0
+        )
+        return x_padded_rotated
+    def inverse_transform(self, y_padded_rotated, orig_x):
+        height, width = orig_x.shape[2:]
+        pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef)
+        y_padded = rotate(
+            y_padded_rotated,
+            -self.angle.to(y_padded_rotated),
+            InterpolationMode.BILINEAR,
+            fill=0,
+        )
+        y_height, y_width = y_padded.shape[2:]
+        y = y_padded[:, :, pad_h : y_height - pad_h, pad_w : y_width - pad_w]
+        return y
+class SELayer(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(SELayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel, bias=False),
+            nn.Sigmoid(),
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        res = x * y.expand_as(x)
+        return res
+class FourierUnit(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        groups=1,
+        spatial_scale_factor=None,
+        spatial_scale_mode="bilinear",
+        spectral_pos_encoding=False,
+        use_se=False,
+        se_kwargs=None,
+        ffc3d=False,
+        fft_norm="ortho",
+    ):
+        # bn_layer not used
+        super(FourierUnit, self).__init__()
+        self.groups = groups
+        self.conv_layer = torch.nn.Conv2d(
+            in_channels=in_channels * 2 + (2 if spectral_pos_encoding else 0),
+            out_channels=out_channels * 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=self.groups,
+            bias=False,
+        )
+        self.bn = torch.nn.BatchNorm2d(out_channels * 2)
+        self.relu = torch.nn.ReLU(inplace=True)
+        # squeeze and excitation block
+        self.use_se = use_se
+        if use_se:
+            if se_kwargs is None:
+                se_kwargs = {}
+            self.se = SELayer(self.conv_layer.in_channels, **se_kwargs)
+        self.spatial_scale_factor = spatial_scale_factor
+        self.spatial_scale_mode = spatial_scale_mode
+        self.spectral_pos_encoding = spectral_pos_encoding
+        self.ffc3d = ffc3d
+        self.fft_norm = fft_norm
+    def forward(self, x):
+        half_check = False
+        if x.type() == "torch.cuda.HalfTensor":
+            # half only works on gpu anyway
+            half_check = True
+        batch = x.shape[0]
+        if self.spatial_scale_factor is not None:
+            orig_size = x.shape[-2:]
+            x = F.interpolate(
+                x,
+                scale_factor=self.spatial_scale_factor,
+                mode=self.spatial_scale_mode,
+                align_corners=False,
+            )
+        # (batch, c, h, w/2+1, 2)
+        fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1)
+        if half_check == True:
+            ffted = torch.fft.rfftn(
+                x.float(), dim=fft_dim, norm=self.fft_norm
+            )  # .type(torch.cuda.HalfTensor)
+        else:
+            ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm)
+        ffted = torch.stack((ffted.real, ffted.imag), dim=-1)
+        ffted = ffted.permute(0, 1, 4, 2, 3).contiguous()  # (batch, c, 2, h, w/2+1)
+        ffted = ffted.view(
+            (
+                batch,
+                -1,
+            )
+            + ffted.size()[3:]
+        )
+        if self.spectral_pos_encoding:
+            height, width = ffted.shape[-2:]
+            coords_vert = (
+                torch.linspace(0, 1, height)[None, None, :, None]
+                .expand(batch, 1, height, width)
+                .to(ffted)
+            )
+            coords_hor = (
+                torch.linspace(0, 1, width)[None, None, None, :]
+                .expand(batch, 1, height, width)
+                .to(ffted)
+            )
+            ffted = torch.cat((coords_vert, coords_hor, ffted), dim=1)
+        if self.use_se:
+            ffted = self.se(ffted)
+        if half_check == True:
+            ffted = self.conv_layer(ffted.half())  # (batch, c*2, h, w/2+1)
+        else:
+            ffted = self.conv_layer(
+                ffted
+            )  # .type(torch.cuda.FloatTensor)  # (batch, c*2, h, w/2+1)
+        ffted = self.relu(self.bn(ffted))
+        # forcing to be always float
+        ffted = ffted.float()
+        ffted = (
+            ffted.view(
+                (
+                    batch,
+                    -1,
+                    2,
+                )
+                + ffted.size()[2:]
+            )
+            .permute(0, 1, 3, 4, 2)
+            .contiguous()
+        )  # (batch,c, t, h, w/2+1, 2)
+        ffted = torch.complex(ffted[..., 0], ffted[..., 1])
+        ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:]
+        output = torch.fft.irfftn(
+            ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm
+        )
+        if half_check == True:
+            output = output.half()
+        if self.spatial_scale_factor is not None:
+            output = F.interpolate(
+                output,
+                size=orig_size,
+                mode=self.spatial_scale_mode,
+                align_corners=False,
+            )
+        return output
+class SpectralTransform(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        stride=1,
+        groups=1,
+        enable_lfu=True,
+        separable_fu=False,
+        **fu_kwargs,
+    ):
+        # bn_layer not used
+        super(SpectralTransform, self).__init__()
+        self.enable_lfu = enable_lfu
+        if stride == 2:
+            self.downsample = nn.AvgPool2d(kernel_size=(2, 2), stride=2)
+        else:
+            self.downsample = nn.Identity()
+        self.stride = stride
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(
+                in_channels, out_channels // 2, kernel_size=1, groups=groups, bias=False
+            ),
+            nn.BatchNorm2d(out_channels // 2),
+            nn.ReLU(inplace=True),
+        )
+        fu_class = FourierUnit
+        self.fu = fu_class(out_channels // 2, out_channels // 2, groups, **fu_kwargs)
+        if self.enable_lfu:
+            self.lfu = fu_class(out_channels // 2, out_channels // 2, groups)
+        self.conv2 = torch.nn.Conv2d(
+            out_channels // 2, out_channels, kernel_size=1, groups=groups, bias=False
+        )
+    def forward(self, x):
+        x = self.downsample(x)
+        x = self.conv1(x)
+        output = self.fu(x)
+        if self.enable_lfu:
+            _, c, h, _ = x.shape
+            split_no = 2
+            split_s = h // split_no
+            xs = torch.cat(
+                torch.split(x[:, : c // 4], split_s, dim=-2), dim=1
+            ).contiguous()
+            xs = torch.cat(torch.split(xs, split_s, dim=-1), dim=1).contiguous()
+            xs = self.lfu(xs)
+            xs = xs.repeat(1, 1, split_no, split_no).contiguous()
+        else:
+            xs = 0
+        output = self.conv2(x + output + xs)
+        return output
+class FFC(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        ratio_gin,
+        ratio_gout,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        bias=False,
+        enable_lfu=True,
+        padding_type="reflect",
+        gated=False,
+        **spectral_kwargs,
+    ):
+        super(FFC, self).__init__()
+        assert stride == 1 or stride == 2, "Stride should be 1 or 2."
+        self.stride = stride
+        in_cg = int(in_channels * ratio_gin)
+        in_cl = in_channels - in_cg
+        out_cg = int(out_channels * ratio_gout)
+        out_cl = out_channels - out_cg
+        # groups_g = 1 if groups == 1 else int(groups * ratio_gout)
+        # groups_l = 1 if groups == 1 else groups - groups_g
+        self.ratio_gin = ratio_gin
+        self.ratio_gout = ratio_gout
+        self.global_in_num = in_cg
+        module = nn.Identity if in_cl == 0 or out_cl == 0 else nn.Conv2d
+        self.convl2l = module(
+            in_cl,
+            out_cl,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            padding_mode=padding_type,
+        )
+        module = nn.Identity if in_cl == 0 or out_cg == 0 else nn.Conv2d
+        self.convl2g = module(
+            in_cl,
+            out_cg,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            padding_mode=padding_type,
+        )
+        module = nn.Identity if in_cg == 0 or out_cl == 0 else nn.Conv2d
+        self.convg2l = module(
+            in_cg,
+            out_cl,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            padding_mode=padding_type,
+        )
+        module = nn.Identity if in_cg == 0 or out_cg == 0 else SpectralTransform
+        self.convg2g = module(
+            in_cg,
+            out_cg,
+            stride,
+            1 if groups == 1 else groups // 2,
+            enable_lfu,
+            **spectral_kwargs,
+        )
+        self.gated = gated
+        module = (
+            nn.Identity if in_cg == 0 or out_cl == 0 or not self.gated else nn.Conv2d
+        )
+        self.gate = module(in_channels, 2, 1)
+    def forward(self, x):
+        x_l, x_g = x if type(x) is tuple else (x, 0)
+        out_xl, out_xg = 0, 0
+        if self.gated:
+            total_input_parts = [x_l]
+            if torch.is_tensor(x_g):
+                total_input_parts.append(x_g)
+            total_input = torch.cat(total_input_parts, dim=1)
+            gates = torch.sigmoid(self.gate(total_input))
+            g2l_gate, l2g_gate = gates.chunk(2, dim=1)
+        else:
+            g2l_gate, l2g_gate = 1, 1
+        if self.ratio_gout != 1:
+            out_xl = self.convl2l(x_l) + self.convg2l(x_g) * g2l_gate
+        if self.ratio_gout != 0:
+            out_xg = self.convl2g(x_l) * l2g_gate + self.convg2g(x_g)
+        return out_xl, out_xg
+class FFC_BN_ACT(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        ratio_gin,
+        ratio_gout,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        bias=False,
+        norm_layer=nn.BatchNorm2d,
+        activation_layer=nn.Identity,
+        padding_type="reflect",
+        enable_lfu=True,
+        **kwargs,
+    ):
+        super(FFC_BN_ACT, self).__init__()
+        self.ffc = FFC(
+            in_channels,
+            out_channels,
+            kernel_size,
+            ratio_gin,
+            ratio_gout,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            enable_lfu,
+            padding_type=padding_type,
+            **kwargs,
+        )
+        lnorm = nn.Identity if ratio_gout == 1 else norm_layer
+        gnorm = nn.Identity if ratio_gout == 0 else norm_layer
+        global_channels = int(out_channels * ratio_gout)
+        self.bn_l = lnorm(out_channels - global_channels)
+        self.bn_g = gnorm(global_channels)
+        lact = nn.Identity if ratio_gout == 1 else activation_layer
+        gact = nn.Identity if ratio_gout == 0 else activation_layer
+        self.act_l = lact(inplace=True)
+        self.act_g = gact(inplace=True)
+    def forward(self, x):
+        x_l, x_g = self.ffc(x)
+        x_l = self.act_l(self.bn_l(x_l))
+        x_g = self.act_g(self.bn_g(x_g))
+        return x_l, x_g
+class FFCResnetBlock(nn.Module):
+    def __init__(
+        self,
+        dim,
+        padding_type,
+        norm_layer,
+        activation_layer=nn.ReLU,
+        dilation=1,
+        spatial_transform_kwargs=None,
+        inline=False,
+        **conv_kwargs,
+    ):
+        super().__init__()
+        self.conv1 = FFC_BN_ACT(
+            dim,
+            dim,
+            kernel_size=3,
+            padding=dilation,
+            dilation=dilation,
+            norm_layer=norm_layer,
+            activation_layer=activation_layer,
+            padding_type=padding_type,
+            **conv_kwargs,
+        )
+        self.conv2 = FFC_BN_ACT(
+            dim,
+            dim,
+            kernel_size=3,
+            padding=dilation,
+            dilation=dilation,
+            norm_layer=norm_layer,
+            activation_layer=activation_layer,
+            padding_type=padding_type,
+            **conv_kwargs,
+        )
+        if spatial_transform_kwargs is not None:
+            self.conv1 = LearnableSpatialTransformWrapper(
+                self.conv1, **spatial_transform_kwargs
+            )
+            self.conv2 = LearnableSpatialTransformWrapper(
+                self.conv2, **spatial_transform_kwargs
+            )
+        self.inline = inline
+    def forward(self, x):
+        if self.inline:
+            x_l, x_g = (
+                x[:, : -self.conv1.ffc.global_in_num],
+                x[:, -self.conv1.ffc.global_in_num :],
+            )
+        else:
+            x_l, x_g = x if type(x) is tuple else (x, 0)
+        id_l, id_g = x_l, x_g
+        x_l, x_g = self.conv1((x_l, x_g))
+        x_l, x_g = self.conv2((x_l, x_g))
+        x_l, x_g = id_l + x_l, id_g + x_g
+        out = x_l, x_g
+        if self.inline:
+            out = torch.cat(out, dim=1)
+        return out
+class ConcatTupleLayer(nn.Module):
+    def forward(self, x):
+        assert isinstance(x, tuple)
+        x_l, x_g = x
+        assert torch.is_tensor(x_l) or torch.is_tensor(x_g)
+        if not torch.is_tensor(x_g):
+            return x_l
+        return torch.cat(x, dim=1)
+class FFCResNetGenerator(nn.Module):
+    def __init__(
+        self,
+        input_nc,
+        output_nc,
+        ngf=64,
+        n_downsampling=3,
+        n_blocks=18,
+        norm_layer=nn.BatchNorm2d,
+        padding_type="reflect",
+        activation_layer=nn.ReLU,
+        up_norm_layer=nn.BatchNorm2d,
+        up_activation=nn.ReLU(True),
+        init_conv_kwargs={},
+        downsample_conv_kwargs={},
+        resnet_conv_kwargs={},
+        spatial_transform_layers=None,
+        spatial_transform_kwargs={},
+        max_features=1024,
+        out_ffc=False,
+        out_ffc_kwargs={},
+    ):
+        assert n_blocks >= 0
+        super().__init__()
+        """
+        init_conv_kwargs = {'ratio_gin': 0, 'ratio_gout': 0, 'enable_lfu': False}
+        downsample_conv_kwargs = {'ratio_gin': '${generator.init_conv_kwargs.ratio_gout}', 'ratio_gout': '${generator.downsample_conv_kwargs.ratio_gin}', 'enable_lfu': False}
+        resnet_conv_kwargs = {'ratio_gin': 0.75, 'ratio_gout': '${generator.resnet_conv_kwargs.ratio_gin}', 'enable_lfu': False}
+        spatial_transform_kwargs = {}
+        out_ffc_kwargs = {}
+        """
+        """
+        print(input_nc, output_nc, ngf, n_downsampling, n_blocks, norm_layer,
+                padding_type, activation_layer,
+                up_norm_layer, up_activation,
+                spatial_transform_layers,
+                add_out_act, max_features, out_ffc, file=sys.stderr)
+        4 3 64 3 18 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
+        reflect <class 'torch.nn.modules.activation.ReLU'>
+        <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
+        ReLU(inplace=True)
+        None sigmoid 1024 False
+        """
+        init_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False}
+        downsample_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False}
+        resnet_conv_kwargs = {
+            "ratio_gin": 0.75,
+            "ratio_gout": 0.75,
+            "enable_lfu": False,
+        }
+        spatial_transform_kwargs = {}
+        out_ffc_kwargs = {}
+        model = [
+            nn.ReflectionPad2d(3),
+            FFC_BN_ACT(
+                input_nc,
+                ngf,
+                kernel_size=7,
+                padding=0,
+                norm_layer=norm_layer,
+                activation_layer=activation_layer,
+                **init_conv_kwargs,
+            ),
+        ]
+        ### downsample
+        for i in range(n_downsampling):
+            mult = 2**i
+            if i == n_downsampling - 1:
+                cur_conv_kwargs = dict(downsample_conv_kwargs)
+                cur_conv_kwargs["ratio_gout"] = resnet_conv_kwargs.get("ratio_gin", 0)
+            else:
+                cur_conv_kwargs = downsample_conv_kwargs
+            model += [
+                FFC_BN_ACT(
+                    min(max_features, ngf * mult),
+                    min(max_features, ngf * mult * 2),
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    norm_layer=norm_layer,
+                    activation_layer=activation_layer,
+                    **cur_conv_kwargs,
+                )
+            ]
+        mult = 2**n_downsampling
+        feats_num_bottleneck = min(max_features, ngf * mult)
+        ### resnet blocks
+        for i in range(n_blocks):
+            cur_resblock = FFCResnetBlock(
+                feats_num_bottleneck,
+                padding_type=padding_type,
+                activation_layer=activation_layer,
+                norm_layer=norm_layer,
+                **resnet_conv_kwargs,
+            )
+            if spatial_transform_layers is not None and i in spatial_transform_layers:
+                cur_resblock = LearnableSpatialTransformWrapper(
+                    cur_resblock, **spatial_transform_kwargs
+                )
+            model += [cur_resblock]
+        model += [ConcatTupleLayer()]
+        ### upsample
+        for i in range(n_downsampling):
+            mult = 2 ** (n_downsampling - i)
+            model += [
+                nn.ConvTranspose2d(
+                    min(max_features, ngf * mult),
+                    min(max_features, int(ngf * mult / 2)),
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    output_padding=1,
+                ),
+                up_norm_layer(min(max_features, int(ngf * mult / 2))),
+                up_activation,
+            ]
+        if out_ffc:
+            model += [
+                FFCResnetBlock(
+                    ngf,
+                    padding_type=padding_type,
+                    activation_layer=activation_layer,
+                    norm_layer=norm_layer,
+                    inline=True,
+                    **out_ffc_kwargs,
+                )
+            ]
+        model += [
+            nn.ReflectionPad2d(3),
+            nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0),
+        ]
+        model.append(nn.Sigmoid())
+        self.model = nn.Sequential(*model)
+    def forward(self, image, mask):
+        return self.model(torch.cat([image, mask], dim=1))
+class LaMa(nn.Module):
+    def __init__(self, state_dict) -> None:
+        super(LaMa, self).__init__()
+        self.model_arch = "LaMa"
+        self.sub_type = "Inpaint"
+        self.in_nc = 4
+        self.out_nc = 3
+        self.scale = 1
+        self.min_size = None
+        self.pad_mod = 8
+        self.pad_to_square = False
+        self.model = FFCResNetGenerator(self.in_nc, self.out_nc)
+        self.state = {
+            k.replace("generator.model", "model.model"): v
+            for k, v in state_dict.items()
+        }
+        self.supports_fp16 = False
+        self.support_bf16 = True
+        self.load_state_dict(self.state, strict=False)
+    def forward(self, img, mask):
+        masked_img = img * (1 - mask)
+        inpainted_mask = mask * self.model.forward(masked_img, mask)
+        result = inpainted_mask + (1 - mask) * img
+        return result

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/ChannelAttention.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import math
+import torch.nn as nn
+class CA_layer(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(CA_layer, self).__init__()
+        # global average pooling
+        self.gap = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False),
+            nn.GELU(),
+            nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False),
+            # nn.Sigmoid()
+        )
+    def forward(self, x):
+        y = self.fc(self.gap(x))
+        return x * y.expand_as(x)
+class Simple_CA_layer(nn.Module):
+    def __init__(self, channel):
+        super(Simple_CA_layer, self).__init__()
+        self.gap = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Conv2d(
+            in_channels=channel,
+            out_channels=channel,
+            kernel_size=1,
+            padding=0,
+            stride=1,
+            groups=1,
+            bias=True,
+        )
+    def forward(self, x):
+        return x * self.fc(self.gap(x))
+class ECA_layer(nn.Module):
+    """Constructs a ECA module.
+    Args:
+        channel: Number of channels of the input feature map
+        k_size: Adaptive selection of kernel size
+    """
+    def __init__(self, channel):
+        super(ECA_layer, self).__init__()
+        b = 1
+        gamma = 2
+        k_size = int(abs(math.log(channel, 2) + b) / gamma)
+        k_size = k_size if k_size % 2 else k_size + 1
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.conv = nn.Conv1d(
+            1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False
+        )
+        # self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        # x: input features with shape [b, c, h, w]
+        # b, c, h, w = x.size()
+        # feature descriptor on the global spatial information
+        y = self.avg_pool(x)
+        # Two different branches of ECA module
+        y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
+        # Multi-scale information fusion
+        # y = self.sigmoid(y)
+        return x * y.expand_as(x)
+class ECA_MaxPool_layer(nn.Module):
+    """Constructs a ECA module.
+    Args:
+        channel: Number of channels of the input feature map
+        k_size: Adaptive selection of kernel size
+    """
+    def __init__(self, channel):
+        super(ECA_MaxPool_layer, self).__init__()
+        b = 1
+        gamma = 2
+        k_size = int(abs(math.log(channel, 2) + b) / gamma)
+        k_size = k_size if k_size % 2 else k_size + 1
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.conv = nn.Conv1d(
+            1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False
+        )
+        # self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        # x: input features with shape [b, c, h, w]
+        # b, c, h, w = x.size()
+        # feature descriptor on the global spatial information
+        y = self.max_pool(x)
+        # Two different branches of ECA module
+        y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
+        # Multi-scale information fusion
+        # y = self.sigmoid(y)
+        return x * y.expand_as(x)

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/OSA.py ADDED Viewed

	@@ -0,0 +1,577 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################
+# File: OSA.py
+# Created Date: Tuesday April 28th 2022
+# Author: Chen Xuanhong
+# Email: chenxuanhongzju@outlook.com
+# Last Modified:  Sunday, 23rd April 2023 3:07:42 pm
+# Modified By: Chen Xuanhong
+# Copyright (c) 2020 Shanghai Jiao Tong University
+#############################################################
+import torch
+import torch.nn.functional as F
+from einops import rearrange, repeat
+from einops.layers.torch import Rearrange, Reduce
+from torch import einsum, nn
+from .layernorm import LayerNorm2d
+# helpers
+def exists(val):
+    return val is not None
+def default(val, d):
+    return val if exists(val) else d
+def cast_tuple(val, length=1):
+    return val if isinstance(val, tuple) else ((val,) * length)
+# helper classes
+class PreNormResidual(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+    def forward(self, x):
+        return self.fn(self.norm(x)) + x
+class Conv_PreNormResidual(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = LayerNorm2d(dim)
+        self.fn = fn
+    def forward(self, x):
+        return self.fn(self.norm(x)) + x
+class FeedForward(nn.Module):
+    def __init__(self, dim, mult=2, dropout=0.0):
+        super().__init__()
+        inner_dim = int(dim * mult)
+        self.net = nn.Sequential(
+            nn.Linear(dim, inner_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(inner_dim, dim),
+            nn.Dropout(dropout),
+        )
+    def forward(self, x):
+        return self.net(x)
+class Conv_FeedForward(nn.Module):
+    def __init__(self, dim, mult=2, dropout=0.0):
+        super().__init__()
+        inner_dim = int(dim * mult)
+        self.net = nn.Sequential(
+            nn.Conv2d(dim, inner_dim, 1, 1, 0),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Conv2d(inner_dim, dim, 1, 1, 0),
+            nn.Dropout(dropout),
+        )
+    def forward(self, x):
+        return self.net(x)
+class Gated_Conv_FeedForward(nn.Module):
+    def __init__(self, dim, mult=1, bias=False, dropout=0.0):
+        super().__init__()
+        hidden_features = int(dim * mult)
+        self.project_in = nn.Conv2d(dim, hidden_features * 2, kernel_size=1, bias=bias)
+        self.dwconv = nn.Conv2d(
+            hidden_features * 2,
+            hidden_features * 2,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=hidden_features * 2,
+            bias=bias,
+        )
+        self.project_out = nn.Conv2d(hidden_features, dim, kernel_size=1, bias=bias)
+    def forward(self, x):
+        x = self.project_in(x)
+        x1, x2 = self.dwconv(x).chunk(2, dim=1)
+        x = F.gelu(x1) * x2
+        x = self.project_out(x)
+        return x
+# MBConv
+class SqueezeExcitation(nn.Module):
+    def __init__(self, dim, shrinkage_rate=0.25):
+        super().__init__()
+        hidden_dim = int(dim * shrinkage_rate)
+        self.gate = nn.Sequential(
+            Reduce("b c h w -> b c", "mean"),
+            nn.Linear(dim, hidden_dim, bias=False),
+            nn.SiLU(),
+            nn.Linear(hidden_dim, dim, bias=False),
+            nn.Sigmoid(),
+            Rearrange("b c -> b c 1 1"),
+        )
+    def forward(self, x):
+        return x * self.gate(x)
+class MBConvResidual(nn.Module):
+    def __init__(self, fn, dropout=0.0):
+        super().__init__()
+        self.fn = fn
+        self.dropsample = Dropsample(dropout)
+    def forward(self, x):
+        out = self.fn(x)
+        out = self.dropsample(out)
+        return out + x
+class Dropsample(nn.Module):
+    def __init__(self, prob=0):
+        super().__init__()
+        self.prob = prob
+    def forward(self, x):
+        device = x.device
+        if self.prob == 0.0 or (not self.training):
+            return x
+        keep_mask = (
+            torch.FloatTensor((x.shape[0], 1, 1, 1), device=device).uniform_()
+            > self.prob
+        )
+        return x * keep_mask / (1 - self.prob)
+def MBConv(
+    dim_in, dim_out, *, downsample, expansion_rate=4, shrinkage_rate=0.25, dropout=0.0
+):
+    hidden_dim = int(expansion_rate * dim_out)
+    stride = 2 if downsample else 1
+    net = nn.Sequential(
+        nn.Conv2d(dim_in, hidden_dim, 1),
+        # nn.BatchNorm2d(hidden_dim),
+        nn.GELU(),
+        nn.Conv2d(
+            hidden_dim, hidden_dim, 3, stride=stride, padding=1, groups=hidden_dim
+        ),
+        # nn.BatchNorm2d(hidden_dim),
+        nn.GELU(),
+        SqueezeExcitation(hidden_dim, shrinkage_rate=shrinkage_rate),
+        nn.Conv2d(hidden_dim, dim_out, 1),
+        # nn.BatchNorm2d(dim_out)
+    )
+    if dim_in == dim_out and not downsample:
+        net = MBConvResidual(net, dropout=dropout)
+    return net
+# attention related classes
+class Attention(nn.Module):
+    def __init__(
+        self,
+        dim,
+        dim_head=32,
+        dropout=0.0,
+        window_size=7,
+        with_pe=True,
+    ):
+        super().__init__()
+        assert (
+            dim % dim_head
+        ) == 0, "dimension should be divisible by dimension per head"
+        self.heads = dim // dim_head
+        self.scale = dim_head**-0.5
+        self.with_pe = with_pe
+        self.to_qkv = nn.Linear(dim, dim * 3, bias=False)
+        self.attend = nn.Sequential(nn.Softmax(dim=-1), nn.Dropout(dropout))
+        self.to_out = nn.Sequential(
+            nn.Linear(dim, dim, bias=False), nn.Dropout(dropout)
+        )
+        # relative positional bias
+        if self.with_pe:
+            self.rel_pos_bias = nn.Embedding((2 * window_size - 1) ** 2, self.heads)
+            pos = torch.arange(window_size)
+            grid = torch.stack(torch.meshgrid(pos, pos))
+            grid = rearrange(grid, "c i j -> (i j) c")
+            rel_pos = rearrange(grid, "i ... -> i 1 ...") - rearrange(
+                grid, "j ... -> 1 j ..."
+            )
+            rel_pos += window_size - 1
+            rel_pos_indices = (rel_pos * torch.tensor([2 * window_size - 1, 1])).sum(
+                dim=-1
+            )
+            self.register_buffer("rel_pos_indices", rel_pos_indices, persistent=False)
+    def forward(self, x):
+        batch, height, width, window_height, window_width, _, device, h = (
+            *x.shape,
+            x.device,
+            self.heads,
+        )
+        # flatten
+        x = rearrange(x, "b x y w1 w2 d -> (b x y) (w1 w2) d")
+        # project for queries, keys, values
+        q, k, v = self.to_qkv(x).chunk(3, dim=-1)
+        # split heads
+        q, k, v = map(lambda t: rearrange(t, "b n (h d ) -> b h n d", h=h), (q, k, v))
+        # scale
+        q = q * self.scale
+        # sim
+        sim = einsum("b h i d, b h j d -> b h i j", q, k)
+        # add positional bias
+        if self.with_pe:
+            bias = self.rel_pos_bias(self.rel_pos_indices)
+            sim = sim + rearrange(bias, "i j h -> h i j")
+        # attention
+        attn = self.attend(sim)
+        # aggregate
+        out = einsum("b h i j, b h j d -> b h i d", attn, v)
+        # merge heads
+        out = rearrange(
+            out, "b h (w1 w2) d -> b w1 w2 (h d)", w1=window_height, w2=window_width
+        )
+        # combine heads out
+        out = self.to_out(out)
+        return rearrange(out, "(b x y) ... -> b x y ...", x=height, y=width)
+class Block_Attention(nn.Module):
+    def __init__(
+        self,
+        dim,
+        dim_head=32,
+        bias=False,
+        dropout=0.0,
+        window_size=7,
+        with_pe=True,
+    ):
+        super().__init__()
+        assert (
+            dim % dim_head
+        ) == 0, "dimension should be divisible by dimension per head"
+        self.heads = dim // dim_head
+        self.ps = window_size
+        self.scale = dim_head**-0.5
+        self.with_pe = with_pe
+        self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias)
+        self.qkv_dwconv = nn.Conv2d(
+            dim * 3,
+            dim * 3,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=dim * 3,
+            bias=bias,
+        )
+        self.attend = nn.Sequential(nn.Softmax(dim=-1), nn.Dropout(dropout))
+        self.to_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias)
+    def forward(self, x):
+        # project for queries, keys, values
+        b, c, h, w = x.shape
+        qkv = self.qkv_dwconv(self.qkv(x))
+        q, k, v = qkv.chunk(3, dim=1)
+        # split heads
+        q, k, v = map(
+            lambda t: rearrange(
+                t,
+                "b (h d) (x w1) (y w2) -> (b x y) h (w1 w2) d",
+                h=self.heads,
+                w1=self.ps,
+                w2=self.ps,
+            ),
+            (q, k, v),
+        )
+        # scale
+        q = q * self.scale
+        # sim
+        sim = einsum("b h i d, b h j d -> b h i j", q, k)
+        # attention
+        attn = self.attend(sim)
+        # aggregate
+        out = einsum("b h i j, b h j d -> b h i d", attn, v)
+        # merge heads
+        out = rearrange(
+            out,
+            "(b x y) head (w1 w2) d -> b (head d) (x w1) (y w2)",
+            x=h // self.ps,
+            y=w // self.ps,
+            head=self.heads,
+            w1=self.ps,
+            w2=self.ps,
+        )
+        out = self.to_out(out)
+        return out
+class Channel_Attention(nn.Module):
+    def __init__(self, dim, heads, bias=False, dropout=0.0, window_size=7):
+        super(Channel_Attention, self).__init__()
+        self.heads = heads
+        self.temperature = nn.Parameter(torch.ones(heads, 1, 1))
+        self.ps = window_size
+        self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias)
+        self.qkv_dwconv = nn.Conv2d(
+            dim * 3,
+            dim * 3,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=dim * 3,
+            bias=bias,
+        )
+        self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        qkv = self.qkv_dwconv(self.qkv(x))
+        qkv = qkv.chunk(3, dim=1)
+        q, k, v = map(
+            lambda t: rearrange(
+                t,
+                "b (head d) (h ph) (w pw) -> b (h w) head d (ph pw)",
+                ph=self.ps,
+                pw=self.ps,
+                head=self.heads,
+            ),
+            qkv,
+        )
+        q = F.normalize(q, dim=-1)
+        k = F.normalize(k, dim=-1)
+        attn = (q @ k.transpose(-2, -1)) * self.temperature
+        attn = attn.softmax(dim=-1)
+        out = attn @ v
+        out = rearrange(
+            out,
+            "b (h w) head d (ph pw) -> b (head d) (h ph) (w pw)",
+            h=h // self.ps,
+            w=w // self.ps,
+            ph=self.ps,
+            pw=self.ps,
+            head=self.heads,
+        )
+        out = self.project_out(out)
+        return out
+class Channel_Attention_grid(nn.Module):
+    def __init__(self, dim, heads, bias=False, dropout=0.0, window_size=7):
+        super(Channel_Attention_grid, self).__init__()
+        self.heads = heads
+        self.temperature = nn.Parameter(torch.ones(heads, 1, 1))
+        self.ps = window_size
+        self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias)
+        self.qkv_dwconv = nn.Conv2d(
+            dim * 3,
+            dim * 3,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=dim * 3,
+            bias=bias,
+        )
+        self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        qkv = self.qkv_dwconv(self.qkv(x))
+        qkv = qkv.chunk(3, dim=1)
+        q, k, v = map(
+            lambda t: rearrange(
+                t,
+                "b (head d) (h ph) (w pw) -> b (ph pw) head d (h w)",
+                ph=self.ps,
+                pw=self.ps,
+                head=self.heads,
+            ),
+            qkv,
+        )
+        q = F.normalize(q, dim=-1)
+        k = F.normalize(k, dim=-1)
+        attn = (q @ k.transpose(-2, -1)) * self.temperature
+        attn = attn.softmax(dim=-1)
+        out = attn @ v
+        out = rearrange(
+            out,
+            "b (ph pw) head d (h w) -> b (head d) (h ph) (w pw)",
+            h=h // self.ps,
+            w=w // self.ps,
+            ph=self.ps,
+            pw=self.ps,
+            head=self.heads,
+        )
+        out = self.project_out(out)
+        return out
+class OSA_Block(nn.Module):
+    def __init__(
+        self,
+        channel_num=64,
+        bias=True,
+        ffn_bias=True,
+        window_size=8,
+        with_pe=False,
+        dropout=0.0,
+    ):
+        super(OSA_Block, self).__init__()
+        w = window_size
+        self.layer = nn.Sequential(
+            MBConv(
+                channel_num,
+                channel_num,
+                downsample=False,
+                expansion_rate=1,
+                shrinkage_rate=0.25,
+            ),
+            Rearrange(
+                "b d (x w1) (y w2) -> b x y w1 w2 d", w1=w, w2=w
+            ),  # block-like attention
+            PreNormResidual(
+                channel_num,
+                Attention(
+                    dim=channel_num,
+                    dim_head=channel_num // 4,
+                    dropout=dropout,
+                    window_size=window_size,
+                    with_pe=with_pe,
+                ),
+            ),
+            Rearrange("b x y w1 w2 d -> b d (x w1) (y w2)"),
+            Conv_PreNormResidual(
+                channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout)
+            ),
+            # channel-like attention
+            Conv_PreNormResidual(
+                channel_num,
+                Channel_Attention(
+                    dim=channel_num, heads=4, dropout=dropout, window_size=window_size
+                ),
+            ),
+            Conv_PreNormResidual(
+                channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout)
+            ),
+            Rearrange(
+                "b d (w1 x) (w2 y) -> b x y w1 w2 d", w1=w, w2=w
+            ),  # grid-like attention
+            PreNormResidual(
+                channel_num,
+                Attention(
+                    dim=channel_num,
+                    dim_head=channel_num // 4,
+                    dropout=dropout,
+                    window_size=window_size,
+                    with_pe=with_pe,
+                ),
+            ),
+            Rearrange("b x y w1 w2 d -> b d (w1 x) (w2 y)"),
+            Conv_PreNormResidual(
+                channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout)
+            ),
+            # channel-like attention
+            Conv_PreNormResidual(
+                channel_num,
+                Channel_Attention_grid(
+                    dim=channel_num, heads=4, dropout=dropout, window_size=window_size
+                ),
+            ),
+            Conv_PreNormResidual(
+                channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout)
+            ),
+        )
+    def forward(self, x):
+        out = self.layer(x)
+        return out

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/OSAG.py ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################
+# File: OSAG.py
+# Created Date: Tuesday April 28th 2022
+# Author: Chen Xuanhong
+# Email: chenxuanhongzju@outlook.com
+# Last Modified:  Sunday, 23rd April 2023 3:08:49 pm
+# Modified By: Chen Xuanhong
+# Copyright (c) 2020 Shanghai Jiao Tong University
+#############################################################
+import torch.nn as nn
+from .esa import ESA
+from .OSA import OSA_Block
+class OSAG(nn.Module):
+    def __init__(
+        self,
+        channel_num=64,
+        bias=True,
+        block_num=4,
+        ffn_bias=False,
+        window_size=0,
+        pe=False,
+    ):
+        super(OSAG, self).__init__()
+        # print("window_size: %d" % (window_size))
+        # print("with_pe", pe)
+        # print("ffn_bias: %d" % (ffn_bias))
+        # block_script_name = kwargs.get("block_script_name", "OSA")
+        # block_class_name = kwargs.get("block_class_name", "OSA_Block")
+        # script_name = "." + block_script_name
+        # package = __import__(script_name, fromlist=True)
+        block_class = OSA_Block  # getattr(package, block_class_name)
+        group_list = []
+        for _ in range(block_num):
+            temp_res = block_class(
+                channel_num,
+                bias,
+                ffn_bias=ffn_bias,
+                window_size=window_size,
+                with_pe=pe,
+            )
+            group_list.append(temp_res)
+        group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias))
+        self.residual_layer = nn.Sequential(*group_list)
+        esa_channel = max(channel_num // 4, 16)
+        self.esa = ESA(esa_channel, channel_num)
+    def forward(self, x):
+        out = self.residual_layer(x)
+        out = out + x
+        return self.esa(out)

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/OmniSR.py ADDED Viewed

	@@ -0,0 +1,143 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################
+# File: OmniSR.py
+# Created Date: Tuesday April 28th 2022
+# Author: Chen Xuanhong
+# Email: chenxuanhongzju@outlook.com
+# Last Modified:  Sunday, 23rd April 2023 3:06:36 pm
+# Modified By: Chen Xuanhong
+# Copyright (c) 2020 Shanghai Jiao Tong University
+#############################################################
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .OSAG import OSAG
+from .pixelshuffle import pixelshuffle_block
+class OmniSR(nn.Module):
+    def __init__(
+        self,
+        state_dict,
+        **kwargs,
+    ):
+        super(OmniSR, self).__init__()
+        self.state = state_dict
+        bias = True  # Fine to assume this for now
+        block_num = 1  # Fine to assume this for now
+        ffn_bias = True
+        pe = True
+        num_feat = state_dict["input.weight"].shape[0] or 64
+        num_in_ch = state_dict["input.weight"].shape[1] or 3
+        num_out_ch = num_in_ch  # we can just assume this for now. pixelshuffle smh
+        pixelshuffle_shape = state_dict["up.0.weight"].shape[0]
+        up_scale = math.sqrt(pixelshuffle_shape / num_out_ch)
+        if up_scale - int(up_scale) > 0:
+            print(
+                "out_nc is probably different than in_nc, scale calculation might be wrong"
+            )
+        up_scale = int(up_scale)
+        res_num = 0
+        for key in state_dict.keys():
+            if "residual_layer" in key:
+                temp_res_num = int(key.split(".")[1])
+                if temp_res_num > res_num:
+                    res_num = temp_res_num
+        res_num = res_num + 1  # zero-indexed
+        residual_layer = []
+        self.res_num = res_num
+        if (
+            "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight"
+            in state_dict.keys()
+        ):
+            rel_pos_bias_weight = state_dict[
+                "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight"
+            ].shape[0]
+            self.window_size = int((math.sqrt(rel_pos_bias_weight) + 1) / 2)
+        else:
+            self.window_size = 8
+        self.up_scale = up_scale
+        for _ in range(res_num):
+            temp_res = OSAG(
+                channel_num=num_feat,
+                bias=bias,
+                block_num=block_num,
+                ffn_bias=ffn_bias,
+                window_size=self.window_size,
+                pe=pe,
+            )
+            residual_layer.append(temp_res)
+        self.residual_layer = nn.Sequential(*residual_layer)
+        self.input = nn.Conv2d(
+            in_channels=num_in_ch,
+            out_channels=num_feat,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias=bias,
+        )
+        self.output = nn.Conv2d(
+            in_channels=num_feat,
+            out_channels=num_feat,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias=bias,
+        )
+        self.up = pixelshuffle_block(num_feat, num_out_ch, up_scale, bias=bias)
+        # self.tail   = pixelshuffle_block(num_feat,num_out_ch,up_scale,bias=bias)
+        # for m in self.modules():
+        #     if isinstance(m, nn.Conv2d):
+        #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        #         m.weight.data.normal_(0, sqrt(2. / n))
+        # chaiNNer specific stuff
+        self.model_arch = "OmniSR"
+        self.sub_type = "SR"
+        self.in_nc = num_in_ch
+        self.out_nc = num_out_ch
+        self.num_feat = num_feat
+        self.scale = up_scale
+        self.supports_fp16 = True  # TODO: Test this
+        self.supports_bfp16 = True
+        self.min_size_restriction = 16
+        self.load_state_dict(state_dict, strict=False)
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        # import pdb; pdb.set_trace()
+        mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
+        mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
+        # x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "constant", 0)
+        return x
+    def forward(self, x):
+        H, W = x.shape[2:]
+        x = self.check_image_size(x)
+        residual = self.input(x)
+        out = self.residual_layer(residual)
+        # origin
+        out = torch.add(self.output(out), residual)
+        out = self.up(out)
+        out = out[:, :, : H * self.up_scale, : W * self.up_scale]
+        return out

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/esa.py ADDED Viewed

	@@ -0,0 +1,294 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################
+# File: esa.py
+# Created Date: Tuesday April 28th 2022
+# Author: Chen Xuanhong
+# Email: chenxuanhongzju@outlook.com
+# Last Modified:  Thursday, 20th April 2023 9:28:06 am
+# Modified By: Chen Xuanhong
+# Copyright (c) 2020 Shanghai Jiao Tong University
+#############################################################
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .layernorm import LayerNorm2d
+def moment(x, dim=(2, 3), k=2):
+    assert len(x.size()) == 4
+    mean = torch.mean(x, dim=dim).unsqueeze(-1).unsqueeze(-1)
+    mk = (1 / (x.size(2) * x.size(3))) * torch.sum(torch.pow(x - mean, k), dim=dim)
+    return mk
+class ESA(nn.Module):
+    """
+    Modification of Enhanced Spatial Attention (ESA), which is proposed by
+    `Residual Feature Aggregation Network for Image Super-Resolution`
+    Note: `conv_max` and `conv3_` are NOT used here, so the corresponding codes
+    are deleted.
+    """
+    def __init__(self, esa_channels, n_feats, conv=nn.Conv2d):
+        super(ESA, self).__init__()
+        f = esa_channels
+        self.conv1 = conv(n_feats, f, kernel_size=1)
+        self.conv_f = conv(f, f, kernel_size=1)
+        self.conv2 = conv(f, f, kernel_size=3, stride=2, padding=0)
+        self.conv3 = conv(f, f, kernel_size=3, padding=1)
+        self.conv4 = conv(f, n_feats, kernel_size=1)
+        self.sigmoid = nn.Sigmoid()
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        c1_ = self.conv1(x)
+        c1 = self.conv2(c1_)
+        v_max = F.max_pool2d(c1, kernel_size=7, stride=3)
+        c3 = self.conv3(v_max)
+        c3 = F.interpolate(
+            c3, (x.size(2), x.size(3)), mode="bilinear", align_corners=False
+        )
+        cf = self.conv_f(c1_)
+        c4 = self.conv4(c3 + cf)
+        m = self.sigmoid(c4)
+        return x * m
+class LK_ESA(nn.Module):
+    def __init__(
+        self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True
+    ):
+        super(LK_ESA, self).__init__()
+        f = esa_channels
+        self.conv1 = conv(n_feats, f, kernel_size=1)
+        self.conv_f = conv(f, f, kernel_size=1)
+        kernel_size = 17
+        kernel_expand = kernel_expand
+        padding = kernel_size // 2
+        self.vec_conv = nn.Conv2d(
+            in_channels=f * kernel_expand,
+            out_channels=f * kernel_expand,
+            kernel_size=(1, kernel_size),
+            padding=(0, padding),
+            groups=2,
+            bias=bias,
+        )
+        self.vec_conv3x1 = nn.Conv2d(
+            in_channels=f * kernel_expand,
+            out_channels=f * kernel_expand,
+            kernel_size=(1, 3),
+            padding=(0, 1),
+            groups=2,
+            bias=bias,
+        )
+        self.hor_conv = nn.Conv2d(
+            in_channels=f * kernel_expand,
+            out_channels=f * kernel_expand,
+            kernel_size=(kernel_size, 1),
+            padding=(padding, 0),
+            groups=2,
+            bias=bias,
+        )
+        self.hor_conv1x3 = nn.Conv2d(
+            in_channels=f * kernel_expand,
+            out_channels=f * kernel_expand,
+            kernel_size=(3, 1),
+            padding=(1, 0),
+            groups=2,
+            bias=bias,
+        )
+        self.conv4 = conv(f, n_feats, kernel_size=1)
+        self.sigmoid = nn.Sigmoid()
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        c1_ = self.conv1(x)
+        res = self.vec_conv(c1_) + self.vec_conv3x1(c1_)
+        res = self.hor_conv(res) + self.hor_conv1x3(res)
+        cf = self.conv_f(c1_)
+        c4 = self.conv4(res + cf)
+        m = self.sigmoid(c4)
+        return x * m
+class LK_ESA_LN(nn.Module):
+    def __init__(
+        self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True
+    ):
+        super(LK_ESA_LN, self).__init__()
+        f = esa_channels
+        self.conv1 = conv(n_feats, f, kernel_size=1)
+        self.conv_f = conv(f, f, kernel_size=1)
+        kernel_size = 17
+        kernel_expand = kernel_expand
+        padding = kernel_size // 2
+        self.norm = LayerNorm2d(n_feats)
+        self.vec_conv = nn.Conv2d(
+            in_channels=f * kernel_expand,
+            out_channels=f * kernel_expand,
+            kernel_size=(1, kernel_size),
+            padding=(0, padding),
+            groups=2,
+            bias=bias,
+        )
+        self.vec_conv3x1 = nn.Conv2d(
+            in_channels=f * kernel_expand,
+            out_channels=f * kernel_expand,
+            kernel_size=(1, 3),
+            padding=(0, 1),
+            groups=2,
+            bias=bias,
+        )
+        self.hor_conv = nn.Conv2d(
+            in_channels=f * kernel_expand,
+            out_channels=f * kernel_expand,
+            kernel_size=(kernel_size, 1),
+            padding=(padding, 0),
+            groups=2,
+            bias=bias,
+        )
+        self.hor_conv1x3 = nn.Conv2d(
+            in_channels=f * kernel_expand,
+            out_channels=f * kernel_expand,
+            kernel_size=(3, 1),
+            padding=(1, 0),
+            groups=2,
+            bias=bias,
+        )
+        self.conv4 = conv(f, n_feats, kernel_size=1)
+        self.sigmoid = nn.Sigmoid()
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        c1_ = self.norm(x)
+        c1_ = self.conv1(c1_)
+        res = self.vec_conv(c1_) + self.vec_conv3x1(c1_)
+        res = self.hor_conv(res) + self.hor_conv1x3(res)
+        cf = self.conv_f(c1_)
+        c4 = self.conv4(res + cf)
+        m = self.sigmoid(c4)
+        return x * m
+class AdaGuidedFilter(nn.Module):
+    def __init__(
+        self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True
+    ):
+        super(AdaGuidedFilter, self).__init__()
+        self.gap = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Conv2d(
+            in_channels=n_feats,
+            out_channels=1,
+            kernel_size=1,
+            padding=0,
+            stride=1,
+            groups=1,
+            bias=True,
+        )
+        self.r = 5
+    def box_filter(self, x, r):
+        channel = x.shape[1]
+        kernel_size = 2 * r + 1
+        weight = 1.0 / (kernel_size**2)
+        box_kernel = weight * torch.ones(
+            (channel, 1, kernel_size, kernel_size), dtype=torch.float32, device=x.device
+        )
+        output = F.conv2d(x, weight=box_kernel, stride=1, padding=r, groups=channel)
+        return output
+    def forward(self, x):
+        _, _, H, W = x.shape
+        N = self.box_filter(
+            torch.ones((1, 1, H, W), dtype=x.dtype, device=x.device), self.r
+        )
+        # epsilon = self.fc(self.gap(x))
+        # epsilon = torch.pow(epsilon, 2)
+        epsilon = 1e-2
+        mean_x = self.box_filter(x, self.r) / N
+        var_x = self.box_filter(x * x, self.r) / N - mean_x * mean_x
+        A = var_x / (var_x + epsilon)
+        b = (1 - A) * mean_x
+        m = A * x + b
+        # mean_A = self.box_filter(A, self.r) / N
+        # mean_b = self.box_filter(b, self.r) / N
+        # m = mean_A * x + mean_b
+        return x * m
+class AdaConvGuidedFilter(nn.Module):
+    def __init__(
+        self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True
+    ):
+        super(AdaConvGuidedFilter, self).__init__()
+        f = esa_channels
+        self.conv_f = conv(f, f, kernel_size=1)
+        kernel_size = 17
+        kernel_expand = kernel_expand
+        padding = kernel_size // 2
+        self.vec_conv = nn.Conv2d(
+            in_channels=f,
+            out_channels=f,
+            kernel_size=(1, kernel_size),
+            padding=(0, padding),
+            groups=f,
+            bias=bias,
+        )
+        self.hor_conv = nn.Conv2d(
+            in_channels=f,
+            out_channels=f,
+            kernel_size=(kernel_size, 1),
+            padding=(padding, 0),
+            groups=f,
+            bias=bias,
+        )
+        self.gap = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Conv2d(
+            in_channels=f,
+            out_channels=f,
+            kernel_size=1,
+            padding=0,
+            stride=1,
+            groups=1,
+            bias=True,
+        )
+    def forward(self, x):
+        y = self.vec_conv(x)
+        y = self.hor_conv(y)
+        sigma = torch.pow(y, 2)
+        epsilon = self.fc(self.gap(y))
+        weight = sigma / (sigma + epsilon)
+        m = weight * x + (1 - weight)
+        return x * m

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/layernorm.py ADDED Viewed

	@@ -0,0 +1,70 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################
+# File: layernorm.py
+# Created Date: Tuesday April 28th 2022
+# Author: Chen Xuanhong
+# Email: chenxuanhongzju@outlook.com
+# Last Modified:  Thursday, 20th April 2023 9:28:20 am
+# Modified By: Chen Xuanhong
+# Copyright (c) 2020 Shanghai Jiao Tong University
+#############################################################
+import torch
+import torch.nn as nn
+class LayerNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, weight, bias, eps):
+        ctx.eps = eps
+        N, C, H, W = x.size()
+        mu = x.mean(1, keepdim=True)
+        var = (x - mu).pow(2).mean(1, keepdim=True)
+        y = (x - mu) / (var + eps).sqrt()
+        ctx.save_for_backward(y, var, weight)
+        y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1)
+        return y
+    @staticmethod
+    def backward(ctx, grad_output):
+        eps = ctx.eps
+        N, C, H, W = grad_output.size()
+        y, var, weight = ctx.saved_variables
+        g = grad_output * weight.view(1, C, 1, 1)
+        mean_g = g.mean(dim=1, keepdim=True)
+        mean_gy = (g * y).mean(dim=1, keepdim=True)
+        gx = 1.0 / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g)
+        return (
+            gx,
+            (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0),
+            grad_output.sum(dim=3).sum(dim=2).sum(dim=0),
+            None,
+        )
+class LayerNorm2d(nn.Module):
+    def __init__(self, channels, eps=1e-6):
+        super(LayerNorm2d, self).__init__()
+        self.register_parameter("weight", nn.Parameter(torch.ones(channels)))
+        self.register_parameter("bias", nn.Parameter(torch.zeros(channels)))
+        self.eps = eps
+    def forward(self, x):
+        return LayerNormFunction.apply(x, self.weight, self.bias, self.eps)
+class GRN(nn.Module):
+    """GRN (Global Response Normalization) layer"""
+    def __init__(self, dim):
+        super().__init__()
+        self.gamma = nn.Parameter(torch.zeros(1, dim, 1, 1))
+        self.beta = nn.Parameter(torch.zeros(1, dim, 1, 1))
+    def forward(self, x):
+        Gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True)
+        Nx = Gx / (Gx.mean(dim=1, keepdim=True) + 1e-6)
+        return self.gamma * (x * Nx) + self.beta + x

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/OmniSR/pixelshuffle.py ADDED Viewed

	@@ -0,0 +1,31 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################
+# File: pixelshuffle.py
+# Created Date: Friday July 1st 2022
+# Author: Chen Xuanhong
+# Email: chenxuanhongzju@outlook.com
+# Last Modified:  Friday, 1st July 2022 10:18:39 am
+# Modified By: Chen Xuanhong
+# Copyright (c) 2022 Shanghai Jiao Tong University
+#############################################################
+import torch.nn as nn
+def pixelshuffle_block(
+    in_channels, out_channels, upscale_factor=2, kernel_size=3, bias=False
+):
+    """
+    Upsample features according to `upscale_factor`.
+    """
+    padding = kernel_size // 2
+    conv = nn.Conv2d(
+        in_channels,
+        out_channels * (upscale_factor**2),
+        kernel_size,
+        padding=1,
+        bias=bias,
+    )
+    pixel_shuffle = nn.PixelShuffle(upscale_factor)
+    return nn.Sequential(*[conv, pixel_shuffle])

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/RRDB.py ADDED Viewed

	@@ -0,0 +1,296 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import functools
+import math
+import re
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from . import block as B
+# Borrowed from https://github.com/rlaphoenix/VSGAN/blob/master/vsgan/archs/ESRGAN.py
+# Which enhanced stuff that was already here
+class RRDBNet(nn.Module):
+    def __init__(
+        self,
+        state_dict,
+        norm=None,
+        act: str = "leakyrelu",
+        upsampler: str = "upconv",
+        mode: B.ConvMode = "CNA",
+    ) -> None:
+        """
+        ESRGAN - Enhanced Super-Resolution Generative Adversarial Networks.
+        By Xintao Wang, Ke Yu, Shixiang Wu, Jinjin Gu, Yihao Liu, Chao Dong, Yu Qiao,
+        and Chen Change Loy.
+        This is old-arch Residual in Residual Dense Block Network and is not
+        the newest revision that's available at github.com/xinntao/ESRGAN.
+        This is on purpose, the newest Network has severely limited the
+        potential use of the Network with no benefits.
+        This network supports model files from both new and old-arch.
+        Args:
+            norm: Normalization layer
+            act: Activation layer
+            upsampler: Upsample layer. upconv, pixel_shuffle
+            mode: Convolution mode
+        """
+        super(RRDBNet, self).__init__()
+        self.model_arch = "ESRGAN"
+        self.sub_type = "SR"
+        self.state = state_dict
+        self.norm = norm
+        self.act = act
+        self.upsampler = upsampler
+        self.mode = mode
+        self.state_map = {
+            # currently supports old, new, and newer RRDBNet arch models
+            # ESRGAN, BSRGAN/RealSR, Real-ESRGAN
+            "model.0.weight": ("conv_first.weight",),
+            "model.0.bias": ("conv_first.bias",),
+            "model.1.sub./NB/.weight": ("trunk_conv.weight", "conv_body.weight"),
+            "model.1.sub./NB/.bias": ("trunk_conv.bias", "conv_body.bias"),
+            r"model.1.sub.\1.RDB\2.conv\3.0.\4": (
+                r"RRDB_trunk\.(\d+)\.RDB(\d)\.conv(\d+)\.(weight|bias)",
+                r"body\.(\d+)\.rdb(\d)\.conv(\d+)\.(weight|bias)",
+            ),
+        }
+        if "params_ema" in self.state:
+            self.state = self.state["params_ema"]
+            # self.model_arch = "RealESRGAN"
+        self.num_blocks = self.get_num_blocks()
+        self.plus = any("conv1x1" in k for k in self.state.keys())
+        if self.plus:
+            self.model_arch = "ESRGAN+"
+        self.state = self.new_to_old_arch(self.state)
+        self.key_arr = list(self.state.keys())
+        self.in_nc: int = self.state[self.key_arr[0]].shape[1]
+        self.out_nc: int = self.state[self.key_arr[-1]].shape[0]
+        self.scale: int = self.get_scale()
+        self.num_filters: int = self.state[self.key_arr[0]].shape[0]
+        c2x2 = False
+        if self.state["model.0.weight"].shape[-2] == 2:
+            c2x2 = True
+            self.scale = round(math.sqrt(self.scale / 4))
+            self.model_arch = "ESRGAN-2c2"
+        self.supports_fp16 = True
+        self.supports_bfp16 = True
+        self.min_size_restriction = None
+        # Detect if pixelunshuffle was used (Real-ESRGAN)
+        if self.in_nc in (self.out_nc * 4, self.out_nc * 16) and self.out_nc in (
+            self.in_nc / 4,
+            self.in_nc / 16,
+        ):
+            self.shuffle_factor = int(math.sqrt(self.in_nc / self.out_nc))
+        else:
+            self.shuffle_factor = None
+        upsample_block = {
+            "upconv": B.upconv_block,
+            "pixel_shuffle": B.pixelshuffle_block,
+        }.get(self.upsampler)
+        if upsample_block is None:
+            raise NotImplementedError(f"Upsample mode [{self.upsampler}] is not found")
+        if self.scale == 3:
+            upsample_blocks = upsample_block(
+                in_nc=self.num_filters,
+                out_nc=self.num_filters,
+                upscale_factor=3,
+                act_type=self.act,
+                c2x2=c2x2,
+            )
+        else:
+            upsample_blocks = [
+                upsample_block(
+                    in_nc=self.num_filters,
+                    out_nc=self.num_filters,
+                    act_type=self.act,
+                    c2x2=c2x2,
+                )
+                for _ in range(int(math.log(self.scale, 2)))
+            ]
+        self.model = B.sequential(
+            # fea conv
+            B.conv_block(
+                in_nc=self.in_nc,
+                out_nc=self.num_filters,
+                kernel_size=3,
+                norm_type=None,
+                act_type=None,
+                c2x2=c2x2,
+            ),
+            B.ShortcutBlock(
+                B.sequential(
+                    # rrdb blocks
+                    *[
+                        B.RRDB(
+                            nf=self.num_filters,
+                            kernel_size=3,
+                            gc=32,
+                            stride=1,
+                            bias=True,
+                            pad_type="zero",
+                            norm_type=self.norm,
+                            act_type=self.act,
+                            mode="CNA",
+                            plus=self.plus,
+                            c2x2=c2x2,
+                        )
+                        for _ in range(self.num_blocks)
+                    ],
+                    # lr conv
+                    B.conv_block(
+                        in_nc=self.num_filters,
+                        out_nc=self.num_filters,
+                        kernel_size=3,
+                        norm_type=self.norm,
+                        act_type=None,
+                        mode=self.mode,
+                        c2x2=c2x2,
+                    ),
+                )
+            ),
+            *upsample_blocks,
+            # hr_conv0
+            B.conv_block(
+                in_nc=self.num_filters,
+                out_nc=self.num_filters,
+                kernel_size=3,
+                norm_type=None,
+                act_type=self.act,
+                c2x2=c2x2,
+            ),
+            # hr_conv1
+            B.conv_block(
+                in_nc=self.num_filters,
+                out_nc=self.out_nc,
+                kernel_size=3,
+                norm_type=None,
+                act_type=None,
+                c2x2=c2x2,
+            ),
+        )
+        # Adjust these properties for calculations outside of the model
+        if self.shuffle_factor:
+            self.in_nc //= self.shuffle_factor**2
+            self.scale //= self.shuffle_factor
+        self.load_state_dict(self.state, strict=False)
+    def new_to_old_arch(self, state):
+        """Convert a new-arch model state dictionary to an old-arch dictionary."""
+        if "params_ema" in state:
+            state = state["params_ema"]
+        if "conv_first.weight" not in state:
+            # model is already old arch, this is a loose check, but should be sufficient
+            return state
+        # add nb to state keys
+        for kind in ("weight", "bias"):
+            self.state_map[f"model.1.sub.{self.num_blocks}.{kind}"] = self.state_map[
+                f"model.1.sub./NB/.{kind}"
+            ]
+            del self.state_map[f"model.1.sub./NB/.{kind}"]
+        old_state = OrderedDict()
+        for old_key, new_keys in self.state_map.items():
+            for new_key in new_keys:
+                if r"\1" in old_key:
+                    for k, v in state.items():
+                        sub = re.sub(new_key, old_key, k)
+                        if sub != k:
+                            old_state[sub] = v
+                else:
+                    if new_key in state:
+                        old_state[old_key] = state[new_key]
+        # upconv layers
+        max_upconv = 0
+        for key in state.keys():
+            match = re.match(r"(upconv|conv_up)(\d)\.(weight|bias)", key)
+            if match is not None:
+                _, key_num, key_type = match.groups()
+                old_state[f"model.{int(key_num) * 3}.{key_type}"] = state[key]
+                max_upconv = max(max_upconv, int(key_num) * 3)
+        # final layers
+        for key in state.keys():
+            if key in ("HRconv.weight", "conv_hr.weight"):
+                old_state[f"model.{max_upconv + 2}.weight"] = state[key]
+            elif key in ("HRconv.bias", "conv_hr.bias"):
+                old_state[f"model.{max_upconv + 2}.bias"] = state[key]
+            elif key in ("conv_last.weight",):
+                old_state[f"model.{max_upconv + 4}.weight"] = state[key]
+            elif key in ("conv_last.bias",):
+                old_state[f"model.{max_upconv + 4}.bias"] = state[key]
+        # Sort by first numeric value of each layer
+        def compare(item1, item2):
+            parts1 = item1.split(".")
+            parts2 = item2.split(".")
+            int1 = int(parts1[1])
+            int2 = int(parts2[1])
+            return int1 - int2
+        sorted_keys = sorted(old_state.keys(), key=functools.cmp_to_key(compare))
+        # Rebuild the output dict in the right order
+        out_dict = OrderedDict((k, old_state[k]) for k in sorted_keys)
+        return out_dict
+    def get_scale(self, min_part: int = 6) -> int:
+        n = 0
+        for part in list(self.state):
+            parts = part.split(".")[1:]
+            if len(parts) == 2:
+                part_num = int(parts[0])
+                if part_num > min_part and parts[1] == "weight":
+                    n += 1
+        return 2**n
+    def get_num_blocks(self) -> int:
+        nbs = []
+        state_keys = self.state_map[r"model.1.sub.\1.RDB\2.conv\3.0.\4"] + (
+            r"model\.\d+\.sub\.(\d+)\.RDB(\d+)\.conv(\d+)\.0\.(weight|bias)",
+        )
+        for state_key in state_keys:
+            for k in self.state:
+                m = re.search(state_key, k)
+                if m:
+                    nbs.append(int(m.group(1)))
+            if nbs:
+                break
+        return max(*nbs) + 1
+    def forward(self, x):
+        if self.shuffle_factor:
+            _, _, h, w = x.size()
+            mod_pad_h = (
+                self.shuffle_factor - h % self.shuffle_factor
+            ) % self.shuffle_factor
+            mod_pad_w = (
+                self.shuffle_factor - w % self.shuffle_factor
+            ) % self.shuffle_factor
+            x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect")
+            x = torch.pixel_unshuffle(x, downscale_factor=self.shuffle_factor)
+            x = self.model(x)
+            return x[:, :, : h * self.scale, : w * self.scale]
+        return self.model(x)

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SCUNet.py ADDED Viewed

	@@ -0,0 +1,455 @@

+# pylint: skip-file
+# -----------------------------------------------------------------------------------
+# SCUNet: Practical Blind Denoising via Swin-Conv-UNet and Data Synthesis, https://arxiv.org/abs/2203.13278
+# Zhang, Kai and Li, Yawei and Liang, Jingyun and Cao, Jiezhang and Zhang, Yulun and Tang, Hao and Timofte, Radu and Van Gool, Luc
+# -----------------------------------------------------------------------------------
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from einops.layers.torch import Rearrange
+from .timm.drop import DropPath
+from .timm.weight_init import trunc_normal_
+# Borrowed from https://github.com/cszn/SCUNet/blob/main/models/network_scunet.py
+class WMSA(nn.Module):
+    """Self-attention module in Swin Transformer"""
+    def __init__(self, input_dim, output_dim, head_dim, window_size, type):
+        super(WMSA, self).__init__()
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.head_dim = head_dim
+        self.scale = self.head_dim**-0.5
+        self.n_heads = input_dim // head_dim
+        self.window_size = window_size
+        self.type = type
+        self.embedding_layer = nn.Linear(self.input_dim, 3 * self.input_dim, bias=True)
+        self.relative_position_params = nn.Parameter(
+            torch.zeros((2 * window_size - 1) * (2 * window_size - 1), self.n_heads)
+        )
+        # TODO recover
+        # self.relative_position_params = nn.Parameter(torch.zeros(self.n_heads, 2 * window_size - 1, 2 * window_size -1))
+        self.relative_position_params = nn.Parameter(
+            torch.zeros((2 * window_size - 1) * (2 * window_size - 1), self.n_heads)
+        )
+        self.linear = nn.Linear(self.input_dim, self.output_dim)
+        trunc_normal_(self.relative_position_params, std=0.02)
+        self.relative_position_params = torch.nn.Parameter(
+            self.relative_position_params.view(
+                2 * window_size - 1, 2 * window_size - 1, self.n_heads
+            )
+            .transpose(1, 2)
+            .transpose(0, 1)
+        )
+    def generate_mask(self, h, w, p, shift):
+        """generating the mask of SW-MSA
+        Args:
+            shift: shift parameters in CyclicShift.
+        Returns:
+            attn_mask: should be (1 1 w p p),
+        """
+        # supporting square.
+        attn_mask = torch.zeros(
+            h,
+            w,
+            p,
+            p,
+            p,
+            p,
+            dtype=torch.bool,
+            device=self.relative_position_params.device,
+        )
+        if self.type == "W":
+            return attn_mask
+        s = p - shift
+        attn_mask[-1, :, :s, :, s:, :] = True
+        attn_mask[-1, :, s:, :, :s, :] = True
+        attn_mask[:, -1, :, :s, :, s:] = True
+        attn_mask[:, -1, :, s:, :, :s] = True
+        attn_mask = rearrange(
+            attn_mask, "w1 w2 p1 p2 p3 p4 -> 1 1 (w1 w2) (p1 p2) (p3 p4)"
+        )
+        return attn_mask
+    def forward(self, x):
+        """Forward pass of Window Multi-head Self-attention module.
+        Args:
+            x: input tensor with shape of [b h w c];
+            attn_mask: attention mask, fill -inf where the value is True;
+        Returns:
+            output: tensor shape [b h w c]
+        """
+        if self.type != "W":
+            x = torch.roll(
+                x,
+                shifts=(-(self.window_size // 2), -(self.window_size // 2)),
+                dims=(1, 2),
+            )
+        x = rearrange(
+            x,
+            "b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c",
+            p1=self.window_size,
+            p2=self.window_size,
+        )
+        h_windows = x.size(1)
+        w_windows = x.size(2)
+        # square validation
+        # assert h_windows == w_windows
+        x = rearrange(
+            x,
+            "b w1 w2 p1 p2 c -> b (w1 w2) (p1 p2) c",
+            p1=self.window_size,
+            p2=self.window_size,
+        )
+        qkv = self.embedding_layer(x)
+        q, k, v = rearrange(
+            qkv, "b nw np (threeh c) -> threeh b nw np c", c=self.head_dim
+        ).chunk(3, dim=0)
+        sim = torch.einsum("hbwpc,hbwqc->hbwpq", q, k) * self.scale
+        # Adding learnable relative embedding
+        sim = sim + rearrange(self.relative_embedding(), "h p q -> h 1 1 p q")
+        # Using Attn Mask to distinguish different subwindows.
+        if self.type != "W":
+            attn_mask = self.generate_mask(
+                h_windows, w_windows, self.window_size, shift=self.window_size // 2
+            )
+            sim = sim.masked_fill_(attn_mask, float("-inf"))
+        probs = nn.functional.softmax(sim, dim=-1)
+        output = torch.einsum("hbwij,hbwjc->hbwic", probs, v)
+        output = rearrange(output, "h b w p c -> b w p (h c)")
+        output = self.linear(output)
+        output = rearrange(
+            output,
+            "b (w1 w2) (p1 p2) c -> b (w1 p1) (w2 p2) c",
+            w1=h_windows,
+            p1=self.window_size,
+        )
+        if self.type != "W":
+            output = torch.roll(
+                output,
+                shifts=(self.window_size // 2, self.window_size // 2),
+                dims=(1, 2),
+            )
+        return output
+    def relative_embedding(self):
+        cord = torch.tensor(
+            np.array(
+                [
+                    [i, j]
+                    for i in range(self.window_size)
+                    for j in range(self.window_size)
+                ]
+            )
+        )
+        relation = cord[:, None, :] - cord[None, :, :] + self.window_size - 1
+        # negative is allowed
+        return self.relative_position_params[
+            :, relation[:, :, 0].long(), relation[:, :, 1].long()
+        ]
+class Block(nn.Module):
+    def __init__(
+        self,
+        input_dim,
+        output_dim,
+        head_dim,
+        window_size,
+        drop_path,
+        type="W",
+        input_resolution=None,
+    ):
+        """SwinTransformer Block"""
+        super(Block, self).__init__()
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        assert type in ["W", "SW"]
+        self.type = type
+        if input_resolution <= window_size:
+            self.type = "W"
+        self.ln1 = nn.LayerNorm(input_dim)
+        self.msa = WMSA(input_dim, input_dim, head_dim, window_size, self.type)
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.ln2 = nn.LayerNorm(input_dim)
+        self.mlp = nn.Sequential(
+            nn.Linear(input_dim, 4 * input_dim),
+            nn.GELU(),
+            nn.Linear(4 * input_dim, output_dim),
+        )
+    def forward(self, x):
+        x = x + self.drop_path(self.msa(self.ln1(x)))
+        x = x + self.drop_path(self.mlp(self.ln2(x)))
+        return x
+class ConvTransBlock(nn.Module):
+    def __init__(
+        self,
+        conv_dim,
+        trans_dim,
+        head_dim,
+        window_size,
+        drop_path,
+        type="W",
+        input_resolution=None,
+    ):
+        """SwinTransformer and Conv Block"""
+        super(ConvTransBlock, self).__init__()
+        self.conv_dim = conv_dim
+        self.trans_dim = trans_dim
+        self.head_dim = head_dim
+        self.window_size = window_size
+        self.drop_path = drop_path
+        self.type = type
+        self.input_resolution = input_resolution
+        assert self.type in ["W", "SW"]
+        if self.input_resolution <= self.window_size:
+            self.type = "W"
+        self.trans_block = Block(
+            self.trans_dim,
+            self.trans_dim,
+            self.head_dim,
+            self.window_size,
+            self.drop_path,
+            self.type,
+            self.input_resolution,
+        )
+        self.conv1_1 = nn.Conv2d(
+            self.conv_dim + self.trans_dim,
+            self.conv_dim + self.trans_dim,
+            1,
+            1,
+            0,
+            bias=True,
+        )
+        self.conv1_2 = nn.Conv2d(
+            self.conv_dim + self.trans_dim,
+            self.conv_dim + self.trans_dim,
+            1,
+            1,
+            0,
+            bias=True,
+        )
+        self.conv_block = nn.Sequential(
+            nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False),
+            nn.ReLU(True),
+            nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False),
+        )
+    def forward(self, x):
+        conv_x, trans_x = torch.split(
+            self.conv1_1(x), (self.conv_dim, self.trans_dim), dim=1
+        )
+        conv_x = self.conv_block(conv_x) + conv_x
+        trans_x = Rearrange("b c h w -> b h w c")(trans_x)
+        trans_x = self.trans_block(trans_x)
+        trans_x = Rearrange("b h w c -> b c h w")(trans_x)
+        res = self.conv1_2(torch.cat((conv_x, trans_x), dim=1))
+        x = x + res
+        return x
+class SCUNet(nn.Module):
+    def __init__(
+        self,
+        state_dict,
+        in_nc=3,
+        config=[4, 4, 4, 4, 4, 4, 4],
+        dim=64,
+        drop_path_rate=0.0,
+        input_resolution=256,
+    ):
+        super(SCUNet, self).__init__()
+        self.model_arch = "SCUNet"
+        self.sub_type = "SR"
+        self.num_filters: int = 0
+        self.state = state_dict
+        self.config = config
+        self.dim = dim
+        self.head_dim = 32
+        self.window_size = 8
+        self.in_nc = in_nc
+        self.out_nc = self.in_nc
+        self.scale = 1
+        self.supports_fp16 = True
+        # drop path rate for each layer
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(config))]
+        self.m_head = [nn.Conv2d(in_nc, dim, 3, 1, 1, bias=False)]
+        begin = 0
+        self.m_down1 = [
+            ConvTransBlock(
+                dim // 2,
+                dim // 2,
+                self.head_dim,
+                self.window_size,
+                dpr[i + begin],
+                "W" if not i % 2 else "SW",
+                input_resolution,
+            )
+            for i in range(config[0])
+        ] + [nn.Conv2d(dim, 2 * dim, 2, 2, 0, bias=False)]
+        begin += config[0]
+        self.m_down2 = [
+            ConvTransBlock(
+                dim,
+                dim,
+                self.head_dim,
+                self.window_size,
+                dpr[i + begin],
+                "W" if not i % 2 else "SW",
+                input_resolution // 2,
+            )
+            for i in range(config[1])
+        ] + [nn.Conv2d(2 * dim, 4 * dim, 2, 2, 0, bias=False)]
+        begin += config[1]
+        self.m_down3 = [
+            ConvTransBlock(
+                2 * dim,
+                2 * dim,
+                self.head_dim,
+                self.window_size,
+                dpr[i + begin],
+                "W" if not i % 2 else "SW",
+                input_resolution // 4,
+            )
+            for i in range(config[2])
+        ] + [nn.Conv2d(4 * dim, 8 * dim, 2, 2, 0, bias=False)]
+        begin += config[2]
+        self.m_body = [
+            ConvTransBlock(
+                4 * dim,
+                4 * dim,
+                self.head_dim,
+                self.window_size,
+                dpr[i + begin],
+                "W" if not i % 2 else "SW",
+                input_resolution // 8,
+            )
+            for i in range(config[3])
+        ]
+        begin += config[3]
+        self.m_up3 = [
+            nn.ConvTranspose2d(8 * dim, 4 * dim, 2, 2, 0, bias=False),
+        ] + [
+            ConvTransBlock(
+                2 * dim,
+                2 * dim,
+                self.head_dim,
+                self.window_size,
+                dpr[i + begin],
+                "W" if not i % 2 else "SW",
+                input_resolution // 4,
+            )
+            for i in range(config[4])
+        ]
+        begin += config[4]
+        self.m_up2 = [
+            nn.ConvTranspose2d(4 * dim, 2 * dim, 2, 2, 0, bias=False),
+        ] + [
+            ConvTransBlock(
+                dim,
+                dim,
+                self.head_dim,
+                self.window_size,
+                dpr[i + begin],
+                "W" if not i % 2 else "SW",
+                input_resolution // 2,
+            )
+            for i in range(config[5])
+        ]
+        begin += config[5]
+        self.m_up1 = [
+            nn.ConvTranspose2d(2 * dim, dim, 2, 2, 0, bias=False),
+        ] + [
+            ConvTransBlock(
+                dim // 2,
+                dim // 2,
+                self.head_dim,
+                self.window_size,
+                dpr[i + begin],
+                "W" if not i % 2 else "SW",
+                input_resolution,
+            )
+            for i in range(config[6])
+        ]
+        self.m_tail = [nn.Conv2d(dim, in_nc, 3, 1, 1, bias=False)]
+        self.m_head = nn.Sequential(*self.m_head)
+        self.m_down1 = nn.Sequential(*self.m_down1)
+        self.m_down2 = nn.Sequential(*self.m_down2)
+        self.m_down3 = nn.Sequential(*self.m_down3)
+        self.m_body = nn.Sequential(*self.m_body)
+        self.m_up3 = nn.Sequential(*self.m_up3)
+        self.m_up2 = nn.Sequential(*self.m_up2)
+        self.m_up1 = nn.Sequential(*self.m_up1)
+        self.m_tail = nn.Sequential(*self.m_tail)
+        # self.apply(self._init_weights)
+        self.load_state_dict(state_dict, strict=True)
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (64 - h % 64) % 64
+        mod_pad_w = (64 - w % 64) % 64
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect")
+        return x
+    def forward(self, x0):
+        h, w = x0.size()[-2:]
+        x0 = self.check_image_size(x0)
+        x1 = self.m_head(x0)
+        x2 = self.m_down1(x1)
+        x3 = self.m_down2(x2)
+        x4 = self.m_down3(x3)
+        x = self.m_body(x4)
+        x = self.m_up3(x + x4)
+        x = self.m_up2(x + x3)
+        x = self.m_up1(x + x2)
+        x = self.m_tail(x + x1)
+        x = x[:, :, :h, :w]
+        return x
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SPSR.py ADDED Viewed

	@@ -0,0 +1,383 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from . import block as B
+class Get_gradient_nopadding(nn.Module):
+    def __init__(self):
+        super(Get_gradient_nopadding, self).__init__()
+        kernel_v = [[0, -1, 0], [0, 0, 0], [0, 1, 0]]
+        kernel_h = [[0, 0, 0], [-1, 0, 1], [0, 0, 0]]
+        kernel_h = torch.FloatTensor(kernel_h).unsqueeze(0).unsqueeze(0)
+        kernel_v = torch.FloatTensor(kernel_v).unsqueeze(0).unsqueeze(0)
+        self.weight_h = nn.Parameter(data=kernel_h, requires_grad=False)  # type: ignore
+        self.weight_v = nn.Parameter(data=kernel_v, requires_grad=False)  # type: ignore
+    def forward(self, x):
+        x_list = []
+        for i in range(x.shape[1]):
+            x_i = x[:, i]
+            x_i_v = F.conv2d(x_i.unsqueeze(1), self.weight_v, padding=1)
+            x_i_h = F.conv2d(x_i.unsqueeze(1), self.weight_h, padding=1)
+            x_i = torch.sqrt(torch.pow(x_i_v, 2) + torch.pow(x_i_h, 2) + 1e-6)
+            x_list.append(x_i)
+        x = torch.cat(x_list, dim=1)
+        return x
+class SPSRNet(nn.Module):
+    def __init__(
+        self,
+        state_dict,
+        norm=None,
+        act: str = "leakyrelu",
+        upsampler: str = "upconv",
+        mode: B.ConvMode = "CNA",
+    ):
+        super(SPSRNet, self).__init__()
+        self.model_arch = "SPSR"
+        self.sub_type = "SR"
+        self.state = state_dict
+        self.norm = norm
+        self.act = act
+        self.upsampler = upsampler
+        self.mode = mode
+        self.num_blocks = self.get_num_blocks()
+        self.in_nc: int = self.state["model.0.weight"].shape[1]
+        self.out_nc: int = self.state["f_HR_conv1.0.bias"].shape[0]
+        self.scale = self.get_scale(4)
+        self.num_filters: int = self.state["model.0.weight"].shape[0]
+        self.supports_fp16 = True
+        self.supports_bfp16 = True
+        self.min_size_restriction = None
+        n_upscale = int(math.log(self.scale, 2))
+        if self.scale == 3:
+            n_upscale = 1
+        fea_conv = B.conv_block(
+            self.in_nc, self.num_filters, kernel_size=3, norm_type=None, act_type=None
+        )
+        rb_blocks = [
+            B.RRDB(
+                self.num_filters,
+                kernel_size=3,
+                gc=32,
+                stride=1,
+                bias=True,
+                pad_type="zero",
+                norm_type=norm,
+                act_type=act,
+                mode="CNA",
+            )
+            for _ in range(self.num_blocks)
+        ]
+        LR_conv = B.conv_block(
+            self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=norm,
+            act_type=None,
+            mode=mode,
+        )
+        if upsampler == "upconv":
+            upsample_block = B.upconv_block
+        elif upsampler == "pixelshuffle":
+            upsample_block = B.pixelshuffle_block
+        else:
+            raise NotImplementedError(f"upsample mode [{upsampler}] is not found")
+        if self.scale == 3:
+            a_upsampler = upsample_block(
+                self.num_filters, self.num_filters, 3, act_type=act
+            )
+        else:
+            a_upsampler = [
+                upsample_block(self.num_filters, self.num_filters, act_type=act)
+                for _ in range(n_upscale)
+            ]
+        self.HR_conv0_new = B.conv_block(
+            self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=act,
+        )
+        self.HR_conv1_new = B.conv_block(
+            self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=None,
+        )
+        self.model = B.sequential(
+            fea_conv,
+            B.ShortcutBlockSPSR(B.sequential(*rb_blocks, LR_conv)),
+            *a_upsampler,
+            self.HR_conv0_new,
+        )
+        self.get_g_nopadding = Get_gradient_nopadding()
+        self.b_fea_conv = B.conv_block(
+            self.in_nc, self.num_filters, kernel_size=3, norm_type=None, act_type=None
+        )
+        self.b_concat_1 = B.conv_block(
+            2 * self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=None,
+        )
+        self.b_block_1 = B.RRDB(
+            self.num_filters * 2,
+            kernel_size=3,
+            gc=32,
+            stride=1,
+            bias=True,
+            pad_type="zero",
+            norm_type=norm,
+            act_type=act,
+            mode="CNA",
+        )
+        self.b_concat_2 = B.conv_block(
+            2 * self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=None,
+        )
+        self.b_block_2 = B.RRDB(
+            self.num_filters * 2,
+            kernel_size=3,
+            gc=32,
+            stride=1,
+            bias=True,
+            pad_type="zero",
+            norm_type=norm,
+            act_type=act,
+            mode="CNA",
+        )
+        self.b_concat_3 = B.conv_block(
+            2 * self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=None,
+        )
+        self.b_block_3 = B.RRDB(
+            self.num_filters * 2,
+            kernel_size=3,
+            gc=32,
+            stride=1,
+            bias=True,
+            pad_type="zero",
+            norm_type=norm,
+            act_type=act,
+            mode="CNA",
+        )
+        self.b_concat_4 = B.conv_block(
+            2 * self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=None,
+        )
+        self.b_block_4 = B.RRDB(
+            self.num_filters * 2,
+            kernel_size=3,
+            gc=32,
+            stride=1,
+            bias=True,
+            pad_type="zero",
+            norm_type=norm,
+            act_type=act,
+            mode="CNA",
+        )
+        self.b_LR_conv = B.conv_block(
+            self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=norm,
+            act_type=None,
+            mode=mode,
+        )
+        if upsampler == "upconv":
+            upsample_block = B.upconv_block
+        elif upsampler == "pixelshuffle":
+            upsample_block = B.pixelshuffle_block
+        else:
+            raise NotImplementedError(f"upsample mode [{upsampler}] is not found")
+        if self.scale == 3:
+            b_upsampler = upsample_block(
+                self.num_filters, self.num_filters, 3, act_type=act
+            )
+        else:
+            b_upsampler = [
+                upsample_block(self.num_filters, self.num_filters, act_type=act)
+                for _ in range(n_upscale)
+            ]
+        b_HR_conv0 = B.conv_block(
+            self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=act,
+        )
+        b_HR_conv1 = B.conv_block(
+            self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=None,
+        )
+        self.b_module = B.sequential(*b_upsampler, b_HR_conv0, b_HR_conv1)
+        self.conv_w = B.conv_block(
+            self.num_filters, self.out_nc, kernel_size=1, norm_type=None, act_type=None
+        )
+        self.f_concat = B.conv_block(
+            self.num_filters * 2,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=None,
+        )
+        self.f_block = B.RRDB(
+            self.num_filters * 2,
+            kernel_size=3,
+            gc=32,
+            stride=1,
+            bias=True,
+            pad_type="zero",
+            norm_type=norm,
+            act_type=act,
+            mode="CNA",
+        )
+        self.f_HR_conv0 = B.conv_block(
+            self.num_filters,
+            self.num_filters,
+            kernel_size=3,
+            norm_type=None,
+            act_type=act,
+        )
+        self.f_HR_conv1 = B.conv_block(
+            self.num_filters, self.out_nc, kernel_size=3, norm_type=None, act_type=None
+        )
+        self.load_state_dict(self.state, strict=False)
+    def get_scale(self, min_part: int = 4) -> int:
+        n = 0
+        for part in list(self.state):
+            parts = part.split(".")
+            if len(parts) == 3:
+                part_num = int(parts[1])
+                if part_num > min_part and parts[0] == "model" and parts[2] == "weight":
+                    n += 1
+        return 2**n
+    def get_num_blocks(self) -> int:
+        nb = 0
+        for part in list(self.state):
+            parts = part.split(".")
+            n_parts = len(parts)
+            if n_parts == 5 and parts[2] == "sub":
+                nb = int(parts[3])
+        return nb
+    def forward(self, x):
+        x_grad = self.get_g_nopadding(x)
+        x = self.model[0](x)
+        x, block_list = self.model[1](x)
+        x_ori = x
+        for i in range(5):
+            x = block_list[i](x)
+        x_fea1 = x
+        for i in range(5):
+            x = block_list[i + 5](x)
+        x_fea2 = x
+        for i in range(5):
+            x = block_list[i + 10](x)
+        x_fea3 = x
+        for i in range(5):
+            x = block_list[i + 15](x)
+        x_fea4 = x
+        x = block_list[20:](x)
+        # short cut
+        x = x_ori + x
+        x = self.model[2:](x)
+        x = self.HR_conv1_new(x)
+        x_b_fea = self.b_fea_conv(x_grad)
+        x_cat_1 = torch.cat([x_b_fea, x_fea1], dim=1)
+        x_cat_1 = self.b_block_1(x_cat_1)
+        x_cat_1 = self.b_concat_1(x_cat_1)
+        x_cat_2 = torch.cat([x_cat_1, x_fea2], dim=1)
+        x_cat_2 = self.b_block_2(x_cat_2)
+        x_cat_2 = self.b_concat_2(x_cat_2)
+        x_cat_3 = torch.cat([x_cat_2, x_fea3], dim=1)
+        x_cat_3 = self.b_block_3(x_cat_3)
+        x_cat_3 = self.b_concat_3(x_cat_3)
+        x_cat_4 = torch.cat([x_cat_3, x_fea4], dim=1)
+        x_cat_4 = self.b_block_4(x_cat_4)
+        x_cat_4 = self.b_concat_4(x_cat_4)
+        x_cat_4 = self.b_LR_conv(x_cat_4)
+        # short cut
+        x_cat_4 = x_cat_4 + x_b_fea
+        x_branch = self.b_module(x_cat_4)
+        # x_out_branch = self.conv_w(x_branch)
+        ########
+        x_branch_d = x_branch
+        x_f_cat = torch.cat([x_branch_d, x], dim=1)
+        x_f_cat = self.f_block(x_f_cat)
+        x_out = self.f_concat(x_f_cat)
+        x_out = self.f_HR_conv0(x_out)
+        x_out = self.f_HR_conv1(x_out)
+        #########
+        # return x_out_branch, x_out, x_grad
+        return x_out

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SRVGG.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import math
+import torch.nn as nn
+import torch.nn.functional as F
+class SRVGGNetCompact(nn.Module):
+    """A compact VGG-style network structure for super-resolution.
+    It is a compact network structure, which performs upsampling in the last layer and no convolution is
+    conducted on the HR feature space.
+    Args:
+        num_in_ch (int): Channel number of inputs. Default: 3.
+        num_out_ch (int): Channel number of outputs. Default: 3.
+        num_feat (int): Channel number of intermediate features. Default: 64.
+        num_conv (int): Number of convolution layers in the body network. Default: 16.
+        upscale (int): Upsampling factor. Default: 4.
+        act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu.
+    """
+    def __init__(
+        self,
+        state_dict,
+        act_type: str = "prelu",
+    ):
+        super(SRVGGNetCompact, self).__init__()
+        self.model_arch = "SRVGG (RealESRGAN)"
+        self.sub_type = "SR"
+        self.act_type = act_type
+        self.state = state_dict
+        if "params" in self.state:
+            self.state = self.state["params"]
+        self.key_arr = list(self.state.keys())
+        self.in_nc = self.get_in_nc()
+        self.num_feat = self.get_num_feats()
+        self.num_conv = self.get_num_conv()
+        self.out_nc = self.in_nc  # :(
+        self.pixelshuffle_shape = None  # Defined in get_scale()
+        self.scale = self.get_scale()
+        self.supports_fp16 = True
+        self.supports_bfp16 = True
+        self.min_size_restriction = None
+        self.body = nn.ModuleList()
+        # the first conv
+        self.body.append(nn.Conv2d(self.in_nc, self.num_feat, 3, 1, 1))
+        # the first activation
+        if act_type == "relu":
+            activation = nn.ReLU(inplace=True)
+        elif act_type == "prelu":
+            activation = nn.PReLU(num_parameters=self.num_feat)
+        elif act_type == "leakyrelu":
+            activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+        self.body.append(activation)  # type: ignore
+        # the body structure
+        for _ in range(self.num_conv):
+            self.body.append(nn.Conv2d(self.num_feat, self.num_feat, 3, 1, 1))
+            # activation
+            if act_type == "relu":
+                activation = nn.ReLU(inplace=True)
+            elif act_type == "prelu":
+                activation = nn.PReLU(num_parameters=self.num_feat)
+            elif act_type == "leakyrelu":
+                activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+            self.body.append(activation)  # type: ignore
+        # the last conv
+        self.body.append(nn.Conv2d(self.num_feat, self.pixelshuffle_shape, 3, 1, 1))  # type: ignore
+        # upsample
+        self.upsampler = nn.PixelShuffle(self.scale)
+        self.load_state_dict(self.state, strict=False)
+    def get_num_conv(self) -> int:
+        return (int(self.key_arr[-1].split(".")[1]) - 2) // 2
+    def get_num_feats(self) -> int:
+        return self.state[self.key_arr[0]].shape[0]
+    def get_in_nc(self) -> int:
+        return self.state[self.key_arr[0]].shape[1]
+    def get_scale(self) -> int:
+        self.pixelshuffle_shape = self.state[self.key_arr[-1]].shape[0]
+        # Assume out_nc is the same as in_nc
+        # I cant think of a better way to do that
+        self.out_nc = self.in_nc
+        scale = math.sqrt(self.pixelshuffle_shape / self.out_nc)
+        if scale - int(scale) > 0:
+            print(
+                "out_nc is probably different than in_nc, scale calculation might be wrong"
+            )
+        scale = int(scale)
+        return scale
+    def forward(self, x):
+        out = x
+        for i in range(0, len(self.body)):
+            out = self.body[i](out)
+        out = self.upsampler(out)
+        # add the nearest upsampled image, so that the network learns the residual
+        base = F.interpolate(x, scale_factor=self.scale, mode="nearest")
+        out += base
+        return out

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SwiftSRGAN.py ADDED Viewed

	@@ -0,0 +1,161 @@

+# From https://github.com/Koushik0901/Swift-SRGAN/blob/master/swift-srgan/models.py
+import torch
+from torch import nn
+class SeperableConv2d(nn.Module):
+    def __init__(
+        self, in_channels, out_channels, kernel_size, stride=1, padding=1, bias=True
+    ):
+        super(SeperableConv2d, self).__init__()
+        self.depthwise = nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            groups=in_channels,
+            bias=bias,
+            padding=padding,
+        )
+        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias)
+    def forward(self, x):
+        return self.pointwise(self.depthwise(x))
+class ConvBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        use_act=True,
+        use_bn=True,
+        discriminator=False,
+        **kwargs,
+    ):
+        super(ConvBlock, self).__init__()
+        self.use_act = use_act
+        self.cnn = SeperableConv2d(in_channels, out_channels, **kwargs, bias=not use_bn)
+        self.bn = nn.BatchNorm2d(out_channels) if use_bn else nn.Identity()
+        self.act = (
+            nn.LeakyReLU(0.2, inplace=True)
+            if discriminator
+            else nn.PReLU(num_parameters=out_channels)
+        )
+    def forward(self, x):
+        return self.act(self.bn(self.cnn(x))) if self.use_act else self.bn(self.cnn(x))
+class UpsampleBlock(nn.Module):
+    def __init__(self, in_channels, scale_factor):
+        super(UpsampleBlock, self).__init__()
+        self.conv = SeperableConv2d(
+            in_channels,
+            in_channels * scale_factor**2,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+        )
+        self.ps = nn.PixelShuffle(
+            scale_factor
+        )  # (in_channels * 4, H, W) -> (in_channels, H*2, W*2)
+        self.act = nn.PReLU(num_parameters=in_channels)
+    def forward(self, x):
+        return self.act(self.ps(self.conv(x)))
+class ResidualBlock(nn.Module):
+    def __init__(self, in_channels):
+        super(ResidualBlock, self).__init__()
+        self.block1 = ConvBlock(
+            in_channels, in_channels, kernel_size=3, stride=1, padding=1
+        )
+        self.block2 = ConvBlock(
+            in_channels, in_channels, kernel_size=3, stride=1, padding=1, use_act=False
+        )
+    def forward(self, x):
+        out = self.block1(x)
+        out = self.block2(out)
+        return out + x
+class Generator(nn.Module):
+    """Swift-SRGAN Generator
+    Args:
+        in_channels (int): number of input image channels.
+        num_channels (int): number of hidden channels.
+        num_blocks (int): number of residual blocks.
+        upscale_factor (int): factor to upscale the image [2x, 4x, 8x].
+    Returns:
+        torch.Tensor: super resolution image
+    """
+    def __init__(
+        self,
+        state_dict,
+    ):
+        super(Generator, self).__init__()
+        self.model_arch = "Swift-SRGAN"
+        self.sub_type = "SR"
+        self.state = state_dict
+        if "model" in self.state:
+            self.state = self.state["model"]
+        self.in_nc: int = self.state["initial.cnn.depthwise.weight"].shape[0]
+        self.out_nc: int = self.state["final_conv.pointwise.weight"].shape[0]
+        self.num_filters: int = self.state["initial.cnn.pointwise.weight"].shape[0]
+        self.num_blocks = len(
+            set([x.split(".")[1] for x in self.state.keys() if "residual" in x])
+        )
+        self.scale: int = 2 ** len(
+            set([x.split(".")[1] for x in self.state.keys() if "upsampler" in x])
+        )
+        in_channels = self.in_nc
+        num_channels = self.num_filters
+        num_blocks = self.num_blocks
+        upscale_factor = self.scale
+        self.supports_fp16 = True
+        self.supports_bfp16 = True
+        self.min_size_restriction = None
+        self.initial = ConvBlock(
+            in_channels, num_channels, kernel_size=9, stride=1, padding=4, use_bn=False
+        )
+        self.residual = nn.Sequential(
+            *[ResidualBlock(num_channels) for _ in range(num_blocks)]
+        )
+        self.convblock = ConvBlock(
+            num_channels,
+            num_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            use_act=False,
+        )
+        self.upsampler = nn.Sequential(
+            *[
+                UpsampleBlock(num_channels, scale_factor=2)
+                for _ in range(upscale_factor // 2)
+            ]
+        )
+        self.final_conv = SeperableConv2d(
+            num_channels, in_channels, kernel_size=9, stride=1, padding=4
+        )
+        self.load_state_dict(self.state, strict=False)
+    def forward(self, x):
+        initial = self.initial(x)
+        x = self.residual(initial)
+        x = self.convblock(x) + initial
+        x = self.upsampler(x)
+        return (torch.tanh(self.final_conv(x)) + 1) / 2

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/Swin2SR.py ADDED Viewed

	@@ -0,0 +1,1377 @@

+# pylint: skip-file
+# -----------------------------------------------------------------------------------
+# Swin2SR: Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration, https://arxiv.org/abs/2209.11345
+# Written by Conde and Choi et al.
+# From: https://raw.githubusercontent.com/mv-lab/swin2sr/main/models/network_swin2sr.py
+# -----------------------------------------------------------------------------------
+import math
+import re
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+# Originally from the timm package
+from .timm.drop import DropPath
+from .timm.helpers import to_2tuple
+from .timm.weight_init import trunc_normal_
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        act_layer=nn.GELU,
+        drop=0.0,
+    ):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = (
+        x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    )
+    return windows
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(
+        B, H // window_size, W // window_size, window_size, window_size, -1
+    )
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+class WindowAttention(nn.Module):
+    r"""Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+        pretrained_window_size (tuple[int]): The height and width of the window in pre-training.
+    """
+    def __init__(
+        self,
+        dim,
+        window_size,
+        num_heads,
+        qkv_bias=True,
+        attn_drop=0.0,
+        proj_drop=0.0,
+        pretrained_window_size=[0, 0],
+    ):
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.pretrained_window_size = pretrained_window_size
+        self.num_heads = num_heads
+        self.logit_scale = nn.Parameter(torch.log(10 * torch.ones((num_heads, 1, 1))), requires_grad=True)  # type: ignore
+        # mlp to generate continuous relative position bias
+        self.cpb_mlp = nn.Sequential(
+            nn.Linear(2, 512, bias=True),
+            nn.ReLU(inplace=True),
+            nn.Linear(512, num_heads, bias=False),
+        )
+        # get relative_coords_table
+        relative_coords_h = torch.arange(
+            -(self.window_size[0] - 1), self.window_size[0], dtype=torch.float32
+        )
+        relative_coords_w = torch.arange(
+            -(self.window_size[1] - 1), self.window_size[1], dtype=torch.float32
+        )
+        relative_coords_table = (
+            torch.stack(torch.meshgrid([relative_coords_h, relative_coords_w]))
+            .permute(1, 2, 0)
+            .contiguous()
+            .unsqueeze(0)
+        )  # 1, 2*Wh-1, 2*Ww-1, 2
+        if pretrained_window_size[0] > 0:
+            relative_coords_table[:, :, :, 0] /= pretrained_window_size[0] - 1
+            relative_coords_table[:, :, :, 1] /= pretrained_window_size[1] - 1
+        else:
+            relative_coords_table[:, :, :, 0] /= self.window_size[0] - 1
+            relative_coords_table[:, :, :, 1] /= self.window_size[1] - 1
+        relative_coords_table *= 8  # normalize to -8, 8
+        relative_coords_table = (
+            torch.sign(relative_coords_table)
+            * torch.log2(torch.abs(relative_coords_table) + 1.0)
+            / np.log2(8)
+        )
+        self.register_buffer("relative_coords_table", relative_coords_table)
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = (
+            coords_flatten[:, :, None] - coords_flatten[:, None, :]
+        )  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(
+            1, 2, 0
+        ).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+        self.qkv = nn.Linear(dim, dim * 3, bias=False)
+        if qkv_bias:
+            self.q_bias = nn.Parameter(torch.zeros(dim))  # type: ignore
+            self.v_bias = nn.Parameter(torch.zeros(dim))  # type: ignore
+        else:
+            self.q_bias = None
+            self.v_bias = None
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x, mask=None):
+        """
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv_bias = None
+        if self.q_bias is not None:
+            qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))  # type: ignore
+        qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
+        qkv = qkv.reshape(B_, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        q, k, v = (
+            qkv[0],
+            qkv[1],
+            qkv[2],
+        )  # make torchscript happy (cannot use tensor as tuple)
+        # cosine attention
+        attn = F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1)
+        logit_scale = torch.clamp(
+            self.logit_scale,
+            max=torch.log(torch.tensor(1.0 / 0.01)).to(self.logit_scale.device),
+        ).exp()
+        attn = attn * logit_scale
+        relative_position_bias_table = self.cpb_mlp(self.relative_coords_table).view(
+            -1, self.num_heads
+        )
+        relative_position_bias = relative_position_bias_table[self.relative_position_index.view(-1)].view(  # type: ignore
+            self.window_size[0] * self.window_size[1],
+            self.window_size[0] * self.window_size[1],
+            -1,
+        )  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(
+            2, 0, 1
+        ).contiguous()  # nH, Wh*Ww, Wh*Ww
+        relative_position_bias = 16 * torch.sigmoid(relative_position_bias)
+        attn = attn + relative_position_bias.unsqueeze(0)
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(
+                1
+            ).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, N, N)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+    def extra_repr(self) -> str:
+        return (
+            f"dim={self.dim}, window_size={self.window_size}, "
+            f"pretrained_window_size={self.pretrained_window_size}, num_heads={self.num_heads}"
+        )
+    def flops(self, N):
+        # calculate flops for 1 window with token length of N
+        flops = 0
+        # qkv = self.qkv(x)
+        flops += N * self.dim * 3 * self.dim
+        # attn = (q @ k.transpose(-2, -1))
+        flops += self.num_heads * N * (self.dim // self.num_heads) * N
+        #  x = (attn @ v)
+        flops += self.num_heads * N * N * (self.dim // self.num_heads)
+        # x = self.proj(x)
+        flops += N * self.dim * self.dim
+        return flops
+class SwinTransformerBlock(nn.Module):
+    r"""Swin Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resulotion.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+        pretrained_window_size (int): Window size in pre-training.
+    """
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        num_heads,
+        window_size=7,
+        shift_size=0,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+        pretrained_window_size=0,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        if min(self.input_resolution) <= self.window_size:
+            # if window size is larger than input resolution, we don't partition windows
+            self.shift_size = 0
+            self.window_size = min(self.input_resolution)
+        assert (
+            0 <= self.shift_size < self.window_size
+        ), "shift_size must in 0-window_size"
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim,
+            window_size=to_2tuple(self.window_size),
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+            pretrained_window_size=to_2tuple(pretrained_window_size),
+        )
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+        )
+        if self.shift_size > 0:
+            attn_mask = self.calculate_mask(self.input_resolution)
+        else:
+            attn_mask = None
+        self.register_buffer("attn_mask", attn_mask)
+    def calculate_mask(self, x_size):
+        # calculate attention mask for SW-MSA
+        H, W = x_size
+        img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+        h_slices = (
+            slice(0, -self.window_size),
+            slice(-self.window_size, -self.shift_size),
+            slice(-self.shift_size, None),
+        )
+        w_slices = (
+            slice(0, -self.window_size),
+            slice(-self.window_size, -self.shift_size),
+            slice(-self.shift_size, None),
+        )
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+        mask_windows = window_partition(
+            img_mask, self.window_size
+        )  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(
+            attn_mask == 0, float(0.0)
+        )
+        return attn_mask
+    def forward(self, x, x_size):
+        H, W = x_size
+        B, L, C = x.shape
+        # assert L == H * W, "input feature has wrong size"
+        shortcut = x
+        x = x.view(B, H, W, C)
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(
+                x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)
+            )
+        else:
+            shifted_x = x
+        # partition windows
+        x_windows = window_partition(
+            shifted_x, self.window_size
+        )  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(
+            -1, self.window_size * self.window_size, C
+        )  # nW*B, window_size*window_size, C
+        # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
+        if self.input_resolution == x_size:
+            attn_windows = self.attn(
+                x_windows, mask=self.attn_mask
+            )  # nW*B, window_size*window_size, C
+        else:
+            attn_windows = self.attn(
+                x_windows, mask=self.calculate_mask(x_size).to(x.device)
+            )
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(
+                shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)
+            )
+        else:
+            x = shifted_x
+        x = x.view(B, H * W, C)
+        x = shortcut + self.drop_path(self.norm1(x))
+        # FFN
+        x = x + self.drop_path(self.norm2(self.mlp(x)))
+        return x
+    def extra_repr(self) -> str:
+        return (
+            f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, "
+            f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
+        )
+    def flops(self):
+        flops = 0
+        H, W = self.input_resolution
+        # norm1
+        flops += self.dim * H * W
+        # W-MSA/SW-MSA
+        nW = H * W / self.window_size / self.window_size
+        flops += nW * self.attn.flops(self.window_size * self.window_size)
+        # mlp
+        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
+        # norm2
+        flops += self.dim * H * W
+        return flops
+class PatchMerging(nn.Module):
+    r"""Patch Merging Layer.
+    Args:
+        input_resolution (tuple[int]): Resolution of input feature.
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(2 * dim)
+    def forward(self, x):
+        """
+        x: B, H*W, C
+        """
+        H, W = self.input_resolution
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+        assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
+        x = x.view(B, H, W, C)
+        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
+        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
+        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
+        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
+        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
+        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
+        x = self.reduction(x)
+        x = self.norm(x)
+        return x
+    def extra_repr(self) -> str:
+        return f"input_resolution={self.input_resolution}, dim={self.dim}"
+    def flops(self):
+        H, W = self.input_resolution
+        flops = (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
+        flops += H * W * self.dim // 2
+        return flops
+class BasicLayer(nn.Module):
+    """A basic Swin Transformer layer for one stage.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+        pretrained_window_size (int): Local window size in pre-training.
+    """
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        depth,
+        num_heads,
+        window_size,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        norm_layer=nn.LayerNorm,
+        downsample=None,
+        use_checkpoint=False,
+        pretrained_window_size=0,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+        # build blocks
+        self.blocks = nn.ModuleList(
+            [
+                SwinTransformerBlock(
+                    dim=dim,
+                    input_resolution=input_resolution,
+                    num_heads=num_heads,
+                    window_size=window_size,
+                    shift_size=0 if (i % 2 == 0) else window_size // 2,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    drop=drop,
+                    attn_drop=attn_drop,
+                    drop_path=drop_path[i]
+                    if isinstance(drop_path, list)
+                    else drop_path,
+                    norm_layer=norm_layer,
+                    pretrained_window_size=pretrained_window_size,
+                )
+                for i in range(depth)
+            ]
+        )
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(
+                input_resolution, dim=dim, norm_layer=norm_layer
+            )
+        else:
+            self.downsample = None
+    def forward(self, x, x_size):
+        for blk in self.blocks:
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, x_size)
+            else:
+                x = blk(x, x_size)
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return x
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
+    def flops(self):
+        flops = 0
+        for blk in self.blocks:
+            flops += blk.flops()  # type: ignore
+        if self.downsample is not None:
+            flops += self.downsample.flops()
+        return flops
+    def _init_respostnorm(self):
+        for blk in self.blocks:
+            nn.init.constant_(blk.norm1.bias, 0)  # type: ignore
+            nn.init.constant_(blk.norm1.weight, 0)  # type: ignore
+            nn.init.constant_(blk.norm2.bias, 0)  # type: ignore
+            nn.init.constant_(blk.norm2.weight, 0)  # type: ignore
+class PatchEmbed(nn.Module):
+    r"""Image to Patch Embedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(
+        self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None
+    ):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]  # type: ignore
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        self.proj = nn.Conv2d(
+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size  # type: ignore
+        )
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        # assert H == self.img_size[0] and W == self.img_size[1],
+        #     f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)  # B Ph*Pw C
+        if self.norm is not None:
+            x = self.norm(x)
+        return x
+    def flops(self):
+        Ho, Wo = self.patches_resolution
+        flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])  # type: ignore
+        if self.norm is not None:
+            flops += Ho * Wo * self.embed_dim
+        return flops
+class RSTB(nn.Module):
+    """Residual Swin Transformer Block (RSTB).
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+        img_size: Input image size.
+        patch_size: Patch size.
+        resi_connection: The convolutional block before residual connection.
+    """
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        depth,
+        num_heads,
+        window_size,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        norm_layer=nn.LayerNorm,
+        downsample=None,
+        use_checkpoint=False,
+        img_size=224,
+        patch_size=4,
+        resi_connection="1conv",
+    ):
+        super(RSTB, self).__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.residual_group = BasicLayer(
+            dim=dim,
+            input_resolution=input_resolution,
+            depth=depth,
+            num_heads=num_heads,
+            window_size=window_size,
+            mlp_ratio=mlp_ratio,
+            qkv_bias=qkv_bias,
+            drop=drop,
+            attn_drop=attn_drop,
+            drop_path=drop_path,
+            norm_layer=norm_layer,
+            downsample=downsample,
+            use_checkpoint=use_checkpoint,
+        )
+        if resi_connection == "1conv":
+            self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
+        elif resi_connection == "3conv":
+            # to save parameters and memory
+            self.conv = nn.Sequential(
+                nn.Conv2d(dim, dim // 4, 3, 1, 1),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(dim // 4, dim, 3, 1, 1),
+            )
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=dim,
+            embed_dim=dim,
+            norm_layer=None,
+        )
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=dim,
+            embed_dim=dim,
+            norm_layer=None,
+        )
+    def forward(self, x, x_size):
+        return (
+            self.patch_embed(
+                self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))
+            )
+            + x
+        )
+    def flops(self):
+        flops = 0
+        flops += self.residual_group.flops()
+        H, W = self.input_resolution
+        flops += H * W * self.dim * self.dim * 9
+        flops += self.patch_embed.flops()
+        flops += self.patch_unembed.flops()
+        return flops
+class PatchUnEmbed(nn.Module):
+    r"""Image to Patch Unembedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(
+        self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None
+    ):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]  # type: ignore
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+    def forward(self, x, x_size):
+        B, HW, C = x.shape
+        x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1])  # B Ph*Pw C
+        return x
+    def flops(self):
+        flops = 0
+        return flops
+class Upsample(nn.Sequential):
+    """Upsample module.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat):
+        m = []
+        if (scale & (scale - 1)) == 0:  # scale = 2^n
+            for _ in range(int(math.log(scale, 2))):
+                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
+                m.append(nn.PixelShuffle(2))
+        elif scale == 3:
+            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
+            m.append(nn.PixelShuffle(3))
+        else:
+            raise ValueError(
+                f"scale {scale} is not supported. " "Supported scales: 2^n and 3."
+            )
+        super(Upsample, self).__init__(*m)
+class Upsample_hf(nn.Sequential):
+    """Upsample module.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat):
+        m = []
+        if (scale & (scale - 1)) == 0:  # scale = 2^n
+            for _ in range(int(math.log(scale, 2))):
+                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
+                m.append(nn.PixelShuffle(2))
+        elif scale == 3:
+            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
+            m.append(nn.PixelShuffle(3))
+        else:
+            raise ValueError(
+                f"scale {scale} is not supported. " "Supported scales: 2^n and 3."
+            )
+        super(Upsample_hf, self).__init__(*m)
+class UpsampleOneStep(nn.Sequential):
+    """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
+       Used in lightweight SR to save parameters.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
+        self.num_feat = num_feat
+        self.input_resolution = input_resolution
+        m = []
+        m.append(nn.Conv2d(num_feat, (scale**2) * num_out_ch, 3, 1, 1))
+        m.append(nn.PixelShuffle(scale))
+        super(UpsampleOneStep, self).__init__(*m)
+    def flops(self):
+        H, W = self.input_resolution  # type: ignore
+        flops = H * W * self.num_feat * 3 * 9
+        return flops
+class Swin2SR(nn.Module):
+    r"""Swin2SR
+        A PyTorch impl of : `Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration`.
+    Args:
+        img_size (int | tuple(int)): Input image size. Default 64
+        patch_size (int | tuple(int)): Patch size. Default: 1
+        in_chans (int): Number of input image channels. Default: 3
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
+        upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
+        img_range: Image range. 1. or 255.
+        upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
+        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
+    """
+    def __init__(
+        self,
+        state_dict,
+        **kwargs,
+    ):
+        super(Swin2SR, self).__init__()
+        # Defaults
+        img_size = 128
+        patch_size = 1
+        in_chans = 3
+        embed_dim = 96
+        depths = [6, 6, 6, 6]
+        num_heads = [6, 6, 6, 6]
+        window_size = 7
+        mlp_ratio = 4.0
+        qkv_bias = True
+        drop_rate = 0.0
+        attn_drop_rate = 0.0
+        drop_path_rate = 0.1
+        norm_layer = nn.LayerNorm
+        ape = False
+        patch_norm = True
+        use_checkpoint = False
+        upscale = 2
+        img_range = 1.0
+        upsampler = ""
+        resi_connection = "1conv"
+        num_in_ch = in_chans
+        num_out_ch = in_chans
+        num_feat = 64
+        self.model_arch = "Swin2SR"
+        self.sub_type = "SR"
+        self.state = state_dict
+        if "params_ema" in self.state:
+            self.state = self.state["params_ema"]
+        elif "params" in self.state:
+            self.state = self.state["params"]
+        state_keys = self.state.keys()
+        if "conv_before_upsample.0.weight" in state_keys:
+            if "conv_aux.weight" in state_keys:
+                upsampler = "pixelshuffle_aux"
+            elif "conv_up1.weight" in state_keys:
+                upsampler = "nearest+conv"
+            else:
+                upsampler = "pixelshuffle"
+                supports_fp16 = False
+        elif "upsample.0.weight" in state_keys:
+            upsampler = "pixelshuffledirect"
+        else:
+            upsampler = ""
+        num_feat = (
+            self.state.get("conv_before_upsample.0.weight", None).shape[1]
+            if self.state.get("conv_before_upsample.weight", None)
+            else 64
+        )
+        num_in_ch = self.state["conv_first.weight"].shape[1]
+        in_chans = num_in_ch
+        if "conv_last.weight" in state_keys:
+            num_out_ch = self.state["conv_last.weight"].shape[0]
+        else:
+            num_out_ch = num_in_ch
+        upscale = 1
+        if upsampler == "nearest+conv":
+            upsample_keys = [
+                x for x in state_keys if "conv_up" in x and "bias" not in x
+            ]
+            for upsample_key in upsample_keys:
+                upscale *= 2
+        elif upsampler == "pixelshuffle" or upsampler == "pixelshuffle_aux":
+            upsample_keys = [
+                x
+                for x in state_keys
+                if "upsample" in x and "conv" not in x and "bias" not in x
+            ]
+            for upsample_key in upsample_keys:
+                shape = self.state[upsample_key].shape[0]
+                upscale *= math.sqrt(shape // num_feat)
+            upscale = int(upscale)
+        elif upsampler == "pixelshuffledirect":
+            upscale = int(
+                math.sqrt(self.state["upsample.0.bias"].shape[0] // num_out_ch)
+            )
+        max_layer_num = 0
+        max_block_num = 0
+        for key in state_keys:
+            result = re.match(
+                r"layers.(\d*).residual_group.blocks.(\d*).norm1.weight", key
+            )
+            if result:
+                layer_num, block_num = result.groups()
+                max_layer_num = max(max_layer_num, int(layer_num))
+                max_block_num = max(max_block_num, int(block_num))
+        depths = [max_block_num + 1 for _ in range(max_layer_num + 1)]
+        if (
+            "layers.0.residual_group.blocks.0.attn.relative_position_bias_table"
+            in state_keys
+        ):
+            num_heads_num = self.state[
+                "layers.0.residual_group.blocks.0.attn.relative_position_bias_table"
+            ].shape[-1]
+            num_heads = [num_heads_num for _ in range(max_layer_num + 1)]
+        else:
+            num_heads = depths
+        embed_dim = self.state["conv_first.weight"].shape[0]
+        mlp_ratio = float(
+            self.state["layers.0.residual_group.blocks.0.mlp.fc1.bias"].shape[0]
+            / embed_dim
+        )
+        # TODO: could actually count the layers, but this should do
+        if "layers.0.conv.4.weight" in state_keys:
+            resi_connection = "3conv"
+        else:
+            resi_connection = "1conv"
+        window_size = int(
+            math.sqrt(
+                self.state[
+                    "layers.0.residual_group.blocks.0.attn.relative_position_index"
+                ].shape[0]
+            )
+        )
+        if "layers.0.residual_group.blocks.1.attn_mask" in state_keys:
+            img_size = int(
+                math.sqrt(
+                    self.state["layers.0.residual_group.blocks.1.attn_mask"].shape[0]
+                )
+                * window_size
+            )
+        # The JPEG models are the only ones with window-size 7, and they also use this range
+        img_range = 255.0 if window_size == 7 else 1.0
+        self.in_nc = num_in_ch
+        self.out_nc = num_out_ch
+        self.num_feat = num_feat
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.depths = depths
+        self.window_size = window_size
+        self.mlp_ratio = mlp_ratio
+        self.scale = upscale
+        self.upsampler = upsampler
+        self.img_size = img_size
+        self.img_range = img_range
+        self.resi_connection = resi_connection
+        self.supports_fp16 = False  # Too much weirdness to support this at the moment
+        self.supports_bfp16 = True
+        self.min_size_restriction = 16
+        ## END AUTO DETECTION
+        if in_chans == 3:
+            rgb_mean = (0.4488, 0.4371, 0.4040)
+            self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
+        else:
+            self.mean = torch.zeros(1, 1, 1, 1)
+        self.upscale = upscale
+        self.upsampler = upsampler
+        self.window_size = window_size
+        #####################################################################################################
+        ################################### 1, shallow feature extraction ###################################
+        self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
+        #####################################################################################################
+        ################################### 2, deep feature extraction ######################################
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.num_features = embed_dim
+        self.mlp_ratio = mlp_ratio
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=embed_dim,
+            embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None,
+        )
+        num_patches = self.patch_embed.num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+        # merge non-overlapping patches into image
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=embed_dim,
+            embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None,
+        )
+        # absolute position embedding
+        if self.ape:
+            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))  # type: ignore
+            trunc_normal_(self.absolute_pos_embed, std=0.02)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        # stochastic depth
+        dpr = [
+            x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
+        ]  # stochastic depth decay rule
+        # build Residual Swin Transformer blocks (RSTB)
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = RSTB(
+                dim=embed_dim,
+                input_resolution=(patches_resolution[0], patches_resolution[1]),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                mlp_ratio=self.mlp_ratio,
+                qkv_bias=qkv_bias,
+                drop=drop_rate,
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[sum(depths[:i_layer]) : sum(depths[: i_layer + 1])],  # type: ignore    # no impact on SR results
+                norm_layer=norm_layer,
+                downsample=None,
+                use_checkpoint=use_checkpoint,
+                img_size=img_size,
+                patch_size=patch_size,
+                resi_connection=resi_connection,
+            )
+            self.layers.append(layer)
+        if self.upsampler == "pixelshuffle_hf":
+            self.layers_hf = nn.ModuleList()
+            for i_layer in range(self.num_layers):
+                layer = RSTB(
+                    dim=embed_dim,
+                    input_resolution=(patches_resolution[0], patches_resolution[1]),
+                    depth=depths[i_layer],
+                    num_heads=num_heads[i_layer],
+                    window_size=window_size,
+                    mlp_ratio=self.mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[sum(depths[:i_layer]) : sum(depths[: i_layer + 1])],  # type: ignore    # no impact on SR results # type: ignore
+                    norm_layer=norm_layer,
+                    downsample=None,
+                    use_checkpoint=use_checkpoint,
+                    img_size=img_size,
+                    patch_size=patch_size,
+                    resi_connection=resi_connection,
+                )
+                self.layers_hf.append(layer)
+        self.norm = norm_layer(self.num_features)
+        # build the last conv layer in deep feature extraction
+        if resi_connection == "1conv":
+            self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
+        elif resi_connection == "3conv":
+            # to save parameters and memory
+            self.conv_after_body = nn.Sequential(
+                nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1),
+            )
+        #####################################################################################################
+        ################################ 3, high quality image reconstruction ################################
+        if self.upsampler == "pixelshuffle":
+            # for classical SR
+            self.conv_before_upsample = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.upsample = Upsample(upscale, num_feat)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        elif self.upsampler == "pixelshuffle_aux":
+            self.conv_bicubic = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
+            self.conv_before_upsample = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.conv_aux = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+            self.conv_after_aux = nn.Sequential(
+                nn.Conv2d(3, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.upsample = Upsample(upscale, num_feat)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        elif self.upsampler == "pixelshuffle_hf":
+            self.conv_before_upsample = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.upsample = Upsample(upscale, num_feat)
+            self.upsample_hf = Upsample_hf(upscale, num_feat)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+            self.conv_first_hf = nn.Sequential(
+                nn.Conv2d(num_feat, embed_dim, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.conv_after_body_hf = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
+            self.conv_before_upsample_hf = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.conv_last_hf = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        elif self.upsampler == "pixelshuffledirect":
+            # for lightweight SR (to save parameters)
+            self.upsample = UpsampleOneStep(
+                upscale,
+                embed_dim,
+                num_out_ch,
+                (patches_resolution[0], patches_resolution[1]),
+            )
+        elif self.upsampler == "nearest+conv":
+            # for real-world SR (less artifacts)
+            assert self.upscale == 4, "only support x4 now."
+            self.conv_before_upsample = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+            self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        else:
+            # for image denoising and JPEG compression artifact reduction
+            self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1)
+        self.apply(self._init_weights)
+        self.load_state_dict(state_dict)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore  # type: ignore
+    def no_weight_decay(self):
+        return {"absolute_pos_embed"}
+    @torch.jit.ignore  # type: ignore
+    def no_weight_decay_keywords(self):
+        return {"relative_position_bias_table"}
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
+        mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect")
+        return x
+    def forward_features(self, x):
+        x_size = (x.shape[2], x.shape[3])
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x, x_size)
+        x = self.norm(x)  # B L C
+        x = self.patch_unembed(x, x_size)
+        return x
+    def forward_features_hf(self, x):
+        x_size = (x.shape[2], x.shape[3])
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers_hf:
+            x = layer(x, x_size)
+        x = self.norm(x)  # B L C
+        x = self.patch_unembed(x, x_size)
+        return x
+    def forward(self, x):
+        H, W = x.shape[2:]
+        x = self.check_image_size(x)
+        self.mean = self.mean.type_as(x)
+        x = (x - self.mean) * self.img_range
+        if self.upsampler == "pixelshuffle":
+            # for classical SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.conv_last(self.upsample(x))
+        elif self.upsampler == "pixelshuffle_aux":
+            bicubic = F.interpolate(
+                x,
+                size=(H * self.upscale, W * self.upscale),
+                mode="bicubic",
+                align_corners=False,
+            )
+            bicubic = self.conv_bicubic(bicubic)
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            aux = self.conv_aux(x)  # b, 3, LR_H, LR_W
+            x = self.conv_after_aux(aux)
+            x = (
+                self.upsample(x)[:, :, : H * self.upscale, : W * self.upscale]
+                + bicubic[:, :, : H * self.upscale, : W * self.upscale]
+            )
+            x = self.conv_last(x)
+            aux = aux / self.img_range + self.mean
+        elif self.upsampler == "pixelshuffle_hf":
+            # for classical SR with HF
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x_before = self.conv_before_upsample(x)
+            x_out = self.conv_last(self.upsample(x_before))
+            x_hf = self.conv_first_hf(x_before)
+            x_hf = self.conv_after_body_hf(self.forward_features_hf(x_hf)) + x_hf
+            x_hf = self.conv_before_upsample_hf(x_hf)
+            x_hf = self.conv_last_hf(self.upsample_hf(x_hf))
+            x = x_out + x_hf
+            x_hf = x_hf / self.img_range + self.mean
+        elif self.upsampler == "pixelshuffledirect":
+            # for lightweight SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.upsample(x)
+        elif self.upsampler == "nearest+conv":
+            # for real-world SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.lrelu(
+                self.conv_up1(
+                    torch.nn.functional.interpolate(x, scale_factor=2, mode="nearest")
+                )
+            )
+            x = self.lrelu(
+                self.conv_up2(
+                    torch.nn.functional.interpolate(x, scale_factor=2, mode="nearest")
+                )
+            )
+            x = self.conv_last(self.lrelu(self.conv_hr(x)))
+        else:
+            # for image denoising and JPEG compression artifact reduction
+            x_first = self.conv_first(x)
+            res = self.conv_after_body(self.forward_features(x_first)) + x_first
+            x = x + self.conv_last(res)
+        x = x / self.img_range + self.mean
+        if self.upsampler == "pixelshuffle_aux":
+            # NOTE: I removed an "aux" output here. not sure what that was for
+            return x[:, :, : H * self.upscale, : W * self.upscale]  # type: ignore
+        elif self.upsampler == "pixelshuffle_hf":
+            x_out = x_out / self.img_range + self.mean  # type: ignore
+            return x_out[:, :, : H * self.upscale, : W * self.upscale], x[:, :, : H * self.upscale, : W * self.upscale], x_hf[:, :, : H * self.upscale, : W * self.upscale]  # type: ignore
+        else:
+            return x[:, :, : H * self.upscale, : W * self.upscale]
+    def flops(self):
+        flops = 0
+        H, W = self.patches_resolution
+        flops += H * W * 3 * self.embed_dim * 9
+        flops += self.patch_embed.flops()
+        for i, layer in enumerate(self.layers):
+            flops += layer.flops()  # type: ignore
+        flops += H * W * 3 * self.embed_dim * self.embed_dim
+        flops += self.upsample.flops()  # type: ignore
+        return flops

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/SwinIR.py ADDED Viewed

	@@ -0,0 +1,1224 @@

+# pylint: skip-file
+# -----------------------------------------------------------------------------------
+# SwinIR: Image Restoration Using Swin Transformer, https://arxiv.org/abs/2108.10257
+# Originally Written by Ze Liu, Modified by Jingyun Liang.
+# -----------------------------------------------------------------------------------
+import math
+import re
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+# Originally from the timm package
+from .timm.drop import DropPath
+from .timm.helpers import to_2tuple
+from .timm.weight_init import trunc_normal_
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        act_layer=nn.GELU,
+        drop=0.0,
+    ):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = (
+        x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    )
+    return windows
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(
+        B, H // window_size, W // window_size, window_size, window_size, -1
+    )
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+class WindowAttention(nn.Module):
+    r"""Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+    def __init__(
+        self,
+        dim,
+        window_size,
+        num_heads,
+        qkv_bias=True,
+        qk_scale=None,
+        attn_drop=0.0,
+        proj_drop=0.0,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(  # type: ignore
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)
+        )  # 2*Wh-1 * 2*Ww-1, nH
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = (
+            coords_flatten[:, :, None] - coords_flatten[:, None, :]
+        )  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(
+            1, 2, 0
+        ).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        trunc_normal_(self.relative_position_bias_table, std=0.02)
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x, mask=None):
+        """
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv = (
+            self.qkv(x)
+            .reshape(B_, N, 3, self.num_heads, C // self.num_heads)
+            .permute(2, 0, 3, 1, 4)
+        )
+        q, k, v = (
+            qkv[0],
+            qkv[1],
+            qkv[2],
+        )  # make torchscript happy (cannot use tensor as tuple)
+        q = q * self.scale
+        attn = q @ k.transpose(-2, -1)
+        relative_position_bias = self.relative_position_bias_table[
+            self.relative_position_index.view(-1)  # type: ignore
+        ].view(
+            self.window_size[0] * self.window_size[1],
+            self.window_size[0] * self.window_size[1],
+            -1,
+        )  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(
+            2, 0, 1
+        ).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(
+                1
+            ).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, N, N)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}"
+    def flops(self, N):
+        # calculate flops for 1 window with token length of N
+        flops = 0
+        # qkv = self.qkv(x)
+        flops += N * self.dim * 3 * self.dim
+        # attn = (q @ k.transpose(-2, -1))
+        flops += self.num_heads * N * (self.dim // self.num_heads) * N
+        #  x = (attn @ v)
+        flops += self.num_heads * N * N * (self.dim // self.num_heads)
+        # x = self.proj(x)
+        flops += N * self.dim * self.dim
+        return flops
+class SwinTransformerBlock(nn.Module):
+    r"""Swin Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resulotion.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        num_heads,
+        window_size=7,
+        shift_size=0,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        if min(self.input_resolution) <= self.window_size:
+            # if window size is larger than input resolution, we don't partition windows
+            self.shift_size = 0
+            self.window_size = min(self.input_resolution)
+        assert (
+            0 <= self.shift_size < self.window_size
+        ), "shift_size must in 0-window_size"
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim,
+            window_size=to_2tuple(self.window_size),
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+        )
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+        )
+        if self.shift_size > 0:
+            attn_mask = self.calculate_mask(self.input_resolution)
+        else:
+            attn_mask = None
+        self.register_buffer("attn_mask", attn_mask)
+    def calculate_mask(self, x_size):
+        # calculate attention mask for SW-MSA
+        H, W = x_size
+        img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+        h_slices = (
+            slice(0, -self.window_size),
+            slice(-self.window_size, -self.shift_size),
+            slice(-self.shift_size, None),
+        )
+        w_slices = (
+            slice(0, -self.window_size),
+            slice(-self.window_size, -self.shift_size),
+            slice(-self.shift_size, None),
+        )
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+        mask_windows = window_partition(
+            img_mask, self.window_size
+        )  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(
+            attn_mask == 0, float(0.0)
+        )
+        return attn_mask
+    def forward(self, x, x_size):
+        H, W = x_size
+        B, L, C = x.shape
+        # assert L == H * W, "input feature has wrong size"
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(
+                x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)
+            )
+        else:
+            shifted_x = x
+        # partition windows
+        x_windows = window_partition(
+            shifted_x, self.window_size
+        )  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(
+            -1, self.window_size * self.window_size, C
+        )  # nW*B, window_size*window_size, C
+        # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
+        if self.input_resolution == x_size:
+            attn_windows = self.attn(
+                x_windows, mask=self.attn_mask
+            )  # nW*B, window_size*window_size, C
+        else:
+            attn_windows = self.attn(
+                x_windows, mask=self.calculate_mask(x_size).to(x.device)
+            )
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(
+                shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)
+            )
+        else:
+            x = shifted_x
+        x = x.view(B, H * W, C)
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+    def extra_repr(self) -> str:
+        return (
+            f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, "
+            f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
+        )
+    def flops(self):
+        flops = 0
+        H, W = self.input_resolution
+        # norm1
+        flops += self.dim * H * W
+        # W-MSA/SW-MSA
+        nW = H * W / self.window_size / self.window_size
+        flops += nW * self.attn.flops(self.window_size * self.window_size)
+        # mlp
+        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
+        # norm2
+        flops += self.dim * H * W
+        return flops
+class PatchMerging(nn.Module):
+    r"""Patch Merging Layer.
+    Args:
+        input_resolution (tuple[int]): Resolution of input feature.
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(4 * dim)
+    def forward(self, x):
+        """
+        x: B, H*W, C
+        """
+        H, W = self.input_resolution
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+        assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
+        x = x.view(B, H, W, C)
+        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
+        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
+        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
+        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
+        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
+        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
+        x = self.norm(x)
+        x = self.reduction(x)
+        return x
+    def extra_repr(self) -> str:
+        return f"input_resolution={self.input_resolution}, dim={self.dim}"
+    def flops(self):
+        H, W = self.input_resolution
+        flops = H * W * self.dim
+        flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
+        return flops
+class BasicLayer(nn.Module):
+    """A basic Swin Transformer layer for one stage.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        depth,
+        num_heads,
+        window_size,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        norm_layer=nn.LayerNorm,
+        downsample=None,
+        use_checkpoint=False,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+        # build blocks
+        self.blocks = nn.ModuleList(
+            [
+                SwinTransformerBlock(
+                    dim=dim,
+                    input_resolution=input_resolution,
+                    num_heads=num_heads,
+                    window_size=window_size,
+                    shift_size=0 if (i % 2 == 0) else window_size // 2,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop,
+                    attn_drop=attn_drop,
+                    drop_path=drop_path[i]
+                    if isinstance(drop_path, list)
+                    else drop_path,
+                    norm_layer=norm_layer,
+                )
+                for i in range(depth)
+            ]
+        )
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(
+                input_resolution, dim=dim, norm_layer=norm_layer
+            )
+        else:
+            self.downsample = None
+    def forward(self, x, x_size):
+        for blk in self.blocks:
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, x_size)
+            else:
+                x = blk(x, x_size)
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return x
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
+    def flops(self):
+        flops = 0
+        for blk in self.blocks:
+            flops += blk.flops()  # type: ignore
+        if self.downsample is not None:
+            flops += self.downsample.flops()
+        return flops
+class RSTB(nn.Module):
+    """Residual Swin Transformer Block (RSTB).
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+        img_size: Input image size.
+        patch_size: Patch size.
+        resi_connection: The convolutional block before residual connection.
+    """
+    def __init__(
+        self,
+        dim,
+        input_resolution,
+        depth,
+        num_heads,
+        window_size,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        drop_path=0.0,
+        norm_layer=nn.LayerNorm,
+        downsample=None,
+        use_checkpoint=False,
+        img_size=224,
+        patch_size=4,
+        resi_connection="1conv",
+    ):
+        super(RSTB, self).__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.residual_group = BasicLayer(
+            dim=dim,
+            input_resolution=input_resolution,
+            depth=depth,
+            num_heads=num_heads,
+            window_size=window_size,
+            mlp_ratio=mlp_ratio,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            drop=drop,
+            attn_drop=attn_drop,
+            drop_path=drop_path,
+            norm_layer=norm_layer,
+            downsample=downsample,
+            use_checkpoint=use_checkpoint,
+        )
+        if resi_connection == "1conv":
+            self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
+        elif resi_connection == "3conv":
+            # to save parameters and memory
+            self.conv = nn.Sequential(
+                nn.Conv2d(dim, dim // 4, 3, 1, 1),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(dim // 4, dim, 3, 1, 1),
+            )
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=0,
+            embed_dim=dim,
+            norm_layer=None,
+        )
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=0,
+            embed_dim=dim,
+            norm_layer=None,
+        )
+    def forward(self, x, x_size):
+        return (
+            self.patch_embed(
+                self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))
+            )
+            + x
+        )
+    def flops(self):
+        flops = 0
+        flops += self.residual_group.flops()
+        H, W = self.input_resolution
+        flops += H * W * self.dim * self.dim * 9
+        flops += self.patch_embed.flops()
+        flops += self.patch_unembed.flops()
+        return flops
+class PatchEmbed(nn.Module):
+    r"""Image to Patch Embedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(
+        self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None
+    ):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [
+            img_size[0] // patch_size[0],  # type: ignore
+            img_size[1] // patch_size[1],  # type: ignore
+        ]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+    def forward(self, x):
+        x = x.flatten(2).transpose(1, 2)  # B Ph*Pw C
+        if self.norm is not None:
+            x = self.norm(x)
+        return x
+    def flops(self):
+        flops = 0
+        H, W = self.img_size
+        if self.norm is not None:
+            flops += H * W * self.embed_dim  # type: ignore
+        return flops
+class PatchUnEmbed(nn.Module):
+    r"""Image to Patch Unembedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(
+        self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None
+    ):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [
+            img_size[0] // patch_size[0],  # type: ignore
+            img_size[1] // patch_size[1],  # type: ignore
+        ]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+    def forward(self, x, x_size):
+        B, HW, C = x.shape
+        x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1])  # B Ph*Pw C
+        return x
+    def flops(self):
+        flops = 0
+        return flops
+class Upsample(nn.Sequential):
+    """Upsample module.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat):
+        m = []
+        if (scale & (scale - 1)) == 0:  # scale = 2^n
+            for _ in range(int(math.log(scale, 2))):
+                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
+                m.append(nn.PixelShuffle(2))
+        elif scale == 3:
+            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
+            m.append(nn.PixelShuffle(3))
+        else:
+            raise ValueError(
+                f"scale {scale} is not supported. " "Supported scales: 2^n and 3."
+            )
+        super(Upsample, self).__init__(*m)
+class UpsampleOneStep(nn.Sequential):
+    """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
+       Used in lightweight SR to save parameters.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
+        self.num_feat = num_feat
+        self.input_resolution = input_resolution
+        m = []
+        m.append(nn.Conv2d(num_feat, (scale**2) * num_out_ch, 3, 1, 1))
+        m.append(nn.PixelShuffle(scale))
+        super(UpsampleOneStep, self).__init__(*m)
+    def flops(self):
+        H, W = self.input_resolution  # type: ignore
+        flops = H * W * self.num_feat * 3 * 9
+        return flops
+class SwinIR(nn.Module):
+    r"""SwinIR
+        A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer.
+    Args:
+        img_size (int | tuple(int)): Input image size. Default 64
+        patch_size (int | tuple(int)): Patch size. Default: 1
+        in_chans (int): Number of input image channels. Default: 3
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
+        upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
+        img_range: Image range. 1. or 255.
+        upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
+        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
+    """
+    def __init__(
+        self,
+        state_dict,
+        **kwargs,
+    ):
+        super(SwinIR, self).__init__()
+        # Defaults
+        img_size = 64
+        patch_size = 1
+        in_chans = 3
+        embed_dim = 96
+        depths = [6, 6, 6, 6]
+        num_heads = [6, 6, 6, 6]
+        window_size = 7
+        mlp_ratio = 4.0
+        qkv_bias = True
+        qk_scale = None
+        drop_rate = 0.0
+        attn_drop_rate = 0.0
+        drop_path_rate = 0.1
+        norm_layer = nn.LayerNorm
+        ape = False
+        patch_norm = True
+        use_checkpoint = False
+        upscale = 2
+        img_range = 1.0
+        upsampler = ""
+        resi_connection = "1conv"
+        num_feat = 64
+        num_in_ch = in_chans
+        num_out_ch = in_chans
+        supports_fp16 = True
+        self.start_unshuffle = 1
+        self.model_arch = "SwinIR"
+        self.sub_type = "SR"
+        self.state = state_dict
+        if "params_ema" in self.state:
+            self.state = self.state["params_ema"]
+        elif "params" in self.state:
+            self.state = self.state["params"]
+        state_keys = self.state.keys()
+        if "conv_before_upsample.0.weight" in state_keys:
+            if "conv_up1.weight" in state_keys:
+                upsampler = "nearest+conv"
+            else:
+                upsampler = "pixelshuffle"
+                supports_fp16 = False
+        elif "upsample.0.weight" in state_keys:
+            upsampler = "pixelshuffledirect"
+        else:
+            upsampler = ""
+        num_feat = (
+            self.state.get("conv_before_upsample.0.weight", None).shape[1]
+            if self.state.get("conv_before_upsample.weight", None)
+            else 64
+        )
+        if "conv_first.1.weight" in self.state:
+            self.state["conv_first.weight"] = self.state.pop("conv_first.1.weight")
+            self.state["conv_first.bias"] = self.state.pop("conv_first.1.bias")
+            self.start_unshuffle = round(math.sqrt(self.state["conv_first.weight"].shape[1] // 3))
+        num_in_ch = self.state["conv_first.weight"].shape[1]
+        in_chans = num_in_ch
+        if "conv_last.weight" in state_keys:
+            num_out_ch = self.state["conv_last.weight"].shape[0]
+        else:
+            num_out_ch = num_in_ch
+        upscale = 1
+        if upsampler == "nearest+conv":
+            upsample_keys = [
+                x for x in state_keys if "conv_up" in x and "bias" not in x
+            ]
+            for upsample_key in upsample_keys:
+                upscale *= 2
+        elif upsampler == "pixelshuffle":
+            upsample_keys = [
+                x
+                for x in state_keys
+                if "upsample" in x and "conv" not in x and "bias" not in x
+            ]
+            for upsample_key in upsample_keys:
+                shape = self.state[upsample_key].shape[0]
+                upscale *= math.sqrt(shape // num_feat)
+            upscale = int(upscale)
+        elif upsampler == "pixelshuffledirect":
+            upscale = int(
+                math.sqrt(self.state["upsample.0.bias"].shape[0] // num_out_ch)
+            )
+        max_layer_num = 0
+        max_block_num = 0
+        for key in state_keys:
+            result = re.match(
+                r"layers.(\d*).residual_group.blocks.(\d*).norm1.weight", key
+            )
+            if result:
+                layer_num, block_num = result.groups()
+                max_layer_num = max(max_layer_num, int(layer_num))
+                max_block_num = max(max_block_num, int(block_num))
+        depths = [max_block_num + 1 for _ in range(max_layer_num + 1)]
+        if (
+            "layers.0.residual_group.blocks.0.attn.relative_position_bias_table"
+            in state_keys
+        ):
+            num_heads_num = self.state[
+                "layers.0.residual_group.blocks.0.attn.relative_position_bias_table"
+            ].shape[-1]
+            num_heads = [num_heads_num for _ in range(max_layer_num + 1)]
+        else:
+            num_heads = depths
+        embed_dim = self.state["conv_first.weight"].shape[0]
+        mlp_ratio = float(
+            self.state["layers.0.residual_group.blocks.0.mlp.fc1.bias"].shape[0]
+            / embed_dim
+        )
+        # TODO: could actually count the layers, but this should do
+        if "layers.0.conv.4.weight" in state_keys:
+            resi_connection = "3conv"
+        else:
+            resi_connection = "1conv"
+        window_size = int(
+            math.sqrt(
+                self.state[
+                    "layers.0.residual_group.blocks.0.attn.relative_position_index"
+                ].shape[0]
+            )
+        )
+        if "layers.0.residual_group.blocks.1.attn_mask" in state_keys:
+            img_size = int(
+                math.sqrt(
+                    self.state["layers.0.residual_group.blocks.1.attn_mask"].shape[0]
+                )
+                * window_size
+            )
+        # The JPEG models are the only ones with window-size 7, and they also use this range
+        img_range = 255.0 if window_size == 7 else 1.0
+        self.in_nc = num_in_ch
+        self.out_nc = num_out_ch
+        self.num_feat = num_feat
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.depths = depths
+        self.window_size = window_size
+        self.mlp_ratio = mlp_ratio
+        self.scale = upscale / self.start_unshuffle
+        self.upsampler = upsampler
+        self.img_size = img_size
+        self.img_range = img_range
+        self.resi_connection = resi_connection
+        self.supports_fp16 = False  # Too much weirdness to support this at the moment
+        self.supports_bfp16 = True
+        self.min_size_restriction = 16
+        self.img_range = img_range
+        if in_chans == 3:
+            rgb_mean = (0.4488, 0.4371, 0.4040)
+            self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
+        else:
+            self.mean = torch.zeros(1, 1, 1, 1)
+        self.upscale = upscale
+        self.upsampler = upsampler
+        self.window_size = window_size
+        #####################################################################################################
+        ################################### 1, shallow feature extraction ###################################
+        self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
+        #####################################################################################################
+        ################################### 2, deep feature extraction ######################################
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.num_features = embed_dim
+        self.mlp_ratio = mlp_ratio
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=embed_dim,
+            embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None,
+        )
+        num_patches = self.patch_embed.num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+        # merge non-overlapping patches into image
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=embed_dim,
+            embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None,
+        )
+        # absolute position embedding
+        if self.ape:
+            self.absolute_pos_embed = nn.Parameter(  # type: ignore
+                torch.zeros(1, num_patches, embed_dim)
+            )
+            trunc_normal_(self.absolute_pos_embed, std=0.02)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        # stochastic depth
+        dpr = [
+            x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
+        ]  # stochastic depth decay rule
+        # build Residual Swin Transformer blocks (RSTB)
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = RSTB(
+                dim=embed_dim,
+                input_resolution=(patches_resolution[0], patches_resolution[1]),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                mlp_ratio=self.mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[
+                    sum(depths[:i_layer]) : sum(depths[: i_layer + 1])  # type: ignore
+                ],  # no impact on SR results
+                norm_layer=norm_layer,
+                downsample=None,
+                use_checkpoint=use_checkpoint,
+                img_size=img_size,
+                patch_size=patch_size,
+                resi_connection=resi_connection,
+            )
+            self.layers.append(layer)
+        self.norm = norm_layer(self.num_features)
+        # build the last conv layer in deep feature extraction
+        if resi_connection == "1conv":
+            self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
+        elif resi_connection == "3conv":
+            # to save parameters and memory
+            self.conv_after_body = nn.Sequential(
+                nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
+                nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1),
+            )
+        #####################################################################################################
+        ################################ 3, high quality image reconstruction ################################
+        if self.upsampler == "pixelshuffle":
+            # for classical SR
+            self.conv_before_upsample = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.upsample = Upsample(upscale, num_feat)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        elif self.upsampler == "pixelshuffledirect":
+            # for lightweight SR (to save parameters)
+            self.upsample = UpsampleOneStep(
+                upscale,
+                embed_dim,
+                num_out_ch,
+                (patches_resolution[0], patches_resolution[1]),
+            )
+        elif self.upsampler == "nearest+conv":
+            # for real-world SR (less artifacts)
+            self.conv_before_upsample = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True)
+            )
+            self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            if self.upscale == 4:
+                self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            elif self.upscale == 8:
+                self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+                self.conv_up3 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+            self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        else:
+            # for image denoising and JPEG compression artifact reduction
+            self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1)
+        self.apply(self._init_weights)
+        self.load_state_dict(self.state, strict=False)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore  # type: ignore
+    def no_weight_decay(self):
+        return {"absolute_pos_embed"}
+    @torch.jit.ignore  # type: ignore
+    def no_weight_decay_keywords(self):
+        return {"relative_position_bias_table"}
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
+        mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect")
+        return x
+    def forward_features(self, x):
+        x_size = (x.shape[2], x.shape[3])
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x, x_size)
+        x = self.norm(x)  # B L C
+        x = self.patch_unembed(x, x_size)
+        return x
+    def forward(self, x):
+        H, W = x.shape[2:]
+        x = self.check_image_size(x)
+        self.mean = self.mean.type_as(x)
+        x = (x - self.mean) * self.img_range
+        if self.start_unshuffle > 1:
+            x = torch.nn.functional.pixel_unshuffle(x, self.start_unshuffle)
+        if self.upsampler == "pixelshuffle":
+            # for classical SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.conv_last(self.upsample(x))
+        elif self.upsampler == "pixelshuffledirect":
+            # for lightweight SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.upsample(x)
+        elif self.upsampler == "nearest+conv":
+            # for real-world SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.lrelu(
+                self.conv_up1(
+                    torch.nn.functional.interpolate(x, scale_factor=2, mode="nearest")  # type: ignore
+                )
+            )
+            if self.upscale == 4:
+                x = self.lrelu(
+                    self.conv_up2(
+                        torch.nn.functional.interpolate(  # type: ignore
+                            x, scale_factor=2, mode="nearest"
+                        )
+                    )
+                )
+            elif self.upscale == 8:
+                x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
+                x = self.lrelu(self.conv_up3(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
+            x = self.conv_last(self.lrelu(self.conv_hr(x)))
+        else:
+            # for image denoising and JPEG compression artifact reduction
+            x_first = self.conv_first(x)
+            res = self.conv_after_body(self.forward_features(x_first)) + x_first
+            x = x + self.conv_last(res)
+        x = x / self.img_range + self.mean
+        return x[:, :, : H * self.upscale, : W * self.upscale]
+    def flops(self):
+        flops = 0
+        H, W = self.patches_resolution
+        flops += H * W * 3 * self.embed_dim * 9
+        flops += self.patch_embed.flops()
+        for i, layer in enumerate(self.layers):
+            flops += layer.flops()  # type: ignore
+        flops += H * W * 3 * self.embed_dim * self.embed_dim
+        flops += self.upsample.flops()  # type: ignore
+        return flops

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/__init__.py ADDED Viewed

File without changes

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/block.py ADDED Viewed

	@@ -0,0 +1,546 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+from collections import OrderedDict
+try:
+    from typing import Literal
+except ImportError:
+    from typing_extensions import Literal
+import torch
+import torch.nn as nn
+####################
+# Basic blocks
+####################
+def act(act_type: str, inplace=True, neg_slope=0.2, n_prelu=1):
+    # helper selecting activation
+    # neg_slope: for leakyrelu and init of prelu
+    # n_prelu: for p_relu num_parameters
+    act_type = act_type.lower()
+    if act_type == "relu":
+        layer = nn.ReLU(inplace)
+    elif act_type == "leakyrelu":
+        layer = nn.LeakyReLU(neg_slope, inplace)
+    elif act_type == "prelu":
+        layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
+    else:
+        raise NotImplementedError(
+            "activation layer [{:s}] is not found".format(act_type)
+        )
+    return layer
+def norm(norm_type: str, nc: int):
+    # helper selecting normalization layer
+    norm_type = norm_type.lower()
+    if norm_type == "batch":
+        layer = nn.BatchNorm2d(nc, affine=True)
+    elif norm_type == "instance":
+        layer = nn.InstanceNorm2d(nc, affine=False)
+    else:
+        raise NotImplementedError(
+            "normalization layer [{:s}] is not found".format(norm_type)
+        )
+    return layer
+def pad(pad_type: str, padding):
+    # helper selecting padding layer
+    # if padding is 'zero', do by conv layers
+    pad_type = pad_type.lower()
+    if padding == 0:
+        return None
+    if pad_type == "reflect":
+        layer = nn.ReflectionPad2d(padding)
+    elif pad_type == "replicate":
+        layer = nn.ReplicationPad2d(padding)
+    else:
+        raise NotImplementedError(
+            "padding layer [{:s}] is not implemented".format(pad_type)
+        )
+    return layer
+def get_valid_padding(kernel_size, dilation):
+    kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1)
+    padding = (kernel_size - 1) // 2
+    return padding
+class ConcatBlock(nn.Module):
+    # Concat the output of a submodule to its input
+    def __init__(self, submodule):
+        super(ConcatBlock, self).__init__()
+        self.sub = submodule
+    def forward(self, x):
+        output = torch.cat((x, self.sub(x)), dim=1)
+        return output
+    def __repr__(self):
+        tmpstr = "Identity .. \n|"
+        modstr = self.sub.__repr__().replace("\n", "\n|")
+        tmpstr = tmpstr + modstr
+        return tmpstr
+class ShortcutBlock(nn.Module):
+    # Elementwise sum the output of a submodule to its input
+    def __init__(self, submodule):
+        super(ShortcutBlock, self).__init__()
+        self.sub = submodule
+    def forward(self, x):
+        output = x + self.sub(x)
+        return output
+    def __repr__(self):
+        tmpstr = "Identity + \n|"
+        modstr = self.sub.__repr__().replace("\n", "\n|")
+        tmpstr = tmpstr + modstr
+        return tmpstr
+class ShortcutBlockSPSR(nn.Module):
+    # Elementwise sum the output of a submodule to its input
+    def __init__(self, submodule):
+        super(ShortcutBlockSPSR, self).__init__()
+        self.sub = submodule
+    def forward(self, x):
+        return x, self.sub
+    def __repr__(self):
+        tmpstr = "Identity + \n|"
+        modstr = self.sub.__repr__().replace("\n", "\n|")
+        tmpstr = tmpstr + modstr
+        return tmpstr
+def sequential(*args):
+    # Flatten Sequential. It unwraps nn.Sequential.
+    if len(args) == 1:
+        if isinstance(args[0], OrderedDict):
+            raise NotImplementedError("sequential does not support OrderedDict input.")
+        return args[0]  # No sequential is needed.
+    modules = []
+    for module in args:
+        if isinstance(module, nn.Sequential):
+            for submodule in module.children():
+                modules.append(submodule)
+        elif isinstance(module, nn.Module):
+            modules.append(module)
+    return nn.Sequential(*modules)
+ConvMode = Literal["CNA", "NAC", "CNAC"]
+# 2x2x2 Conv Block
+def conv_block_2c2(
+    in_nc,
+    out_nc,
+    act_type="relu",
+):
+    return sequential(
+        nn.Conv2d(in_nc, out_nc, kernel_size=2, padding=1),
+        nn.Conv2d(out_nc, out_nc, kernel_size=2, padding=0),
+        act(act_type) if act_type else None,
+    )
+def conv_block(
+    in_nc: int,
+    out_nc: int,
+    kernel_size,
+    stride=1,
+    dilation=1,
+    groups=1,
+    bias=True,
+    pad_type="zero",
+    norm_type: str | None = None,
+    act_type: str | None = "relu",
+    mode: ConvMode = "CNA",
+    c2x2=False,
+):
+    """
+    Conv layer with padding, normalization, activation
+    mode: CNA --> Conv -> Norm -> Act
+        NAC --> Norm -> Act --> Conv (Identity Mappings in Deep Residual Networks, ECCV16)
+    """
+    if c2x2:
+        return conv_block_2c2(in_nc, out_nc, act_type=act_type)
+    assert mode in ("CNA", "NAC", "CNAC"), "Wrong conv mode [{:s}]".format(mode)
+    padding = get_valid_padding(kernel_size, dilation)
+    p = pad(pad_type, padding) if pad_type and pad_type != "zero" else None
+    padding = padding if pad_type == "zero" else 0
+    c = nn.Conv2d(
+        in_nc,
+        out_nc,
+        kernel_size=kernel_size,
+        stride=stride,
+        padding=padding,
+        dilation=dilation,
+        bias=bias,
+        groups=groups,
+    )
+    a = act(act_type) if act_type else None
+    if mode in ("CNA", "CNAC"):
+        n = norm(norm_type, out_nc) if norm_type else None
+        return sequential(p, c, n, a)
+    elif mode == "NAC":
+        if norm_type is None and act_type is not None:
+            a = act(act_type, inplace=False)
+            # Important!
+            # input----ReLU(inplace)----Conv--+----output
+            #        |________________________|
+            # inplace ReLU will modify the input, therefore wrong output
+        n = norm(norm_type, in_nc) if norm_type else None
+        return sequential(n, a, p, c)
+    else:
+        assert False, f"Invalid conv mode {mode}"
+####################
+# Useful blocks
+####################
+class ResNetBlock(nn.Module):
+    """
+    ResNet Block, 3-3 style
+    with extra residual scaling used in EDSR
+    (Enhanced Deep Residual Networks for Single Image Super-Resolution, CVPRW 17)
+    """
+    def __init__(
+        self,
+        in_nc,
+        mid_nc,
+        out_nc,
+        kernel_size=3,
+        stride=1,
+        dilation=1,
+        groups=1,
+        bias=True,
+        pad_type="zero",
+        norm_type=None,
+        act_type="relu",
+        mode: ConvMode = "CNA",
+        res_scale=1,
+    ):
+        super(ResNetBlock, self).__init__()
+        conv0 = conv_block(
+            in_nc,
+            mid_nc,
+            kernel_size,
+            stride,
+            dilation,
+            groups,
+            bias,
+            pad_type,
+            norm_type,
+            act_type,
+            mode,
+        )
+        if mode == "CNA":
+            act_type = None
+        if mode == "CNAC":  # Residual path: |-CNAC-|
+            act_type = None
+            norm_type = None
+        conv1 = conv_block(
+            mid_nc,
+            out_nc,
+            kernel_size,
+            stride,
+            dilation,
+            groups,
+            bias,
+            pad_type,
+            norm_type,
+            act_type,
+            mode,
+        )
+        # if in_nc != out_nc:
+        #     self.project = conv_block(in_nc, out_nc, 1, stride, dilation, 1, bias, pad_type, \
+        #         None, None)
+        #     print('Need a projecter in ResNetBlock.')
+        # else:
+        #     self.project = lambda x:x
+        self.res = sequential(conv0, conv1)
+        self.res_scale = res_scale
+    def forward(self, x):
+        res = self.res(x).mul(self.res_scale)
+        return x + res
+class RRDB(nn.Module):
+    """
+    Residual in Residual Dense Block
+    (ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks)
+    """
+    def __init__(
+        self,
+        nf,
+        kernel_size=3,
+        gc=32,
+        stride=1,
+        bias: bool = True,
+        pad_type="zero",
+        norm_type=None,
+        act_type="leakyrelu",
+        mode: ConvMode = "CNA",
+        _convtype="Conv2D",
+        _spectral_norm=False,
+        plus=False,
+        c2x2=False,
+    ):
+        super(RRDB, self).__init__()
+        self.RDB1 = ResidualDenseBlock_5C(
+            nf,
+            kernel_size,
+            gc,
+            stride,
+            bias,
+            pad_type,
+            norm_type,
+            act_type,
+            mode,
+            plus=plus,
+            c2x2=c2x2,
+        )
+        self.RDB2 = ResidualDenseBlock_5C(
+            nf,
+            kernel_size,
+            gc,
+            stride,
+            bias,
+            pad_type,
+            norm_type,
+            act_type,
+            mode,
+            plus=plus,
+            c2x2=c2x2,
+        )
+        self.RDB3 = ResidualDenseBlock_5C(
+            nf,
+            kernel_size,
+            gc,
+            stride,
+            bias,
+            pad_type,
+            norm_type,
+            act_type,
+            mode,
+            plus=plus,
+            c2x2=c2x2,
+        )
+    def forward(self, x):
+        out = self.RDB1(x)
+        out = self.RDB2(out)
+        out = self.RDB3(out)
+        return out * 0.2 + x
+class ResidualDenseBlock_5C(nn.Module):
+    """
+    Residual Dense Block
+    style: 5 convs
+    The core module of paper: (Residual Dense Network for Image Super-Resolution, CVPR 18)
+    Modified options that can be used:
+        - "Partial Convolution based Padding" arXiv:1811.11718
+        - "Spectral normalization" arXiv:1802.05957
+        - "ICASSP 2020 - ESRGAN+ : Further Improving ESRGAN" N. C.
+            {Rakotonirina} and A. {Rasoanaivo}
+    Args:
+        nf (int): Channel number of intermediate features (num_feat).
+        gc (int): Channels for each growth (num_grow_ch: growth channel,
+            i.e. intermediate channels).
+        convtype (str): the type of convolution to use. Default: 'Conv2D'
+        gaussian_noise (bool): enable the ESRGAN+ gaussian noise (no new
+            trainable parameters)
+        plus (bool): enable the additional residual paths from ESRGAN+
+            (adds trainable parameters)
+    """
+    def __init__(
+        self,
+        nf=64,
+        kernel_size=3,
+        gc=32,
+        stride=1,
+        bias: bool = True,
+        pad_type="zero",
+        norm_type=None,
+        act_type="leakyrelu",
+        mode: ConvMode = "CNA",
+        plus=False,
+        c2x2=False,
+    ):
+        super(ResidualDenseBlock_5C, self).__init__()
+        ## +
+        self.conv1x1 = conv1x1(nf, gc) if plus else None
+        ## +
+        self.conv1 = conv_block(
+            nf,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+            c2x2=c2x2,
+        )
+        self.conv2 = conv_block(
+            nf + gc,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+            c2x2=c2x2,
+        )
+        self.conv3 = conv_block(
+            nf + 2 * gc,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+            c2x2=c2x2,
+        )
+        self.conv4 = conv_block(
+            nf + 3 * gc,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+            c2x2=c2x2,
+        )
+        if mode == "CNA":
+            last_act = None
+        else:
+            last_act = act_type
+        self.conv5 = conv_block(
+            nf + 4 * gc,
+            nf,
+            3,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=last_act,
+            mode=mode,
+            c2x2=c2x2,
+        )
+    def forward(self, x):
+        x1 = self.conv1(x)
+        x2 = self.conv2(torch.cat((x, x1), 1))
+        if self.conv1x1:
+            # pylint: disable=not-callable
+            x2 = x2 + self.conv1x1(x)  # +
+        x3 = self.conv3(torch.cat((x, x1, x2), 1))
+        x4 = self.conv4(torch.cat((x, x1, x2, x3), 1))
+        if self.conv1x1:
+            x4 = x4 + x2  # +
+        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+        return x5 * 0.2 + x
+def conv1x1(in_planes, out_planes, stride=1):
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+####################
+# Upsampler
+####################
+def pixelshuffle_block(
+    in_nc: int,
+    out_nc: int,
+    upscale_factor=2,
+    kernel_size=3,
+    stride=1,
+    bias=True,
+    pad_type="zero",
+    norm_type: str | None = None,
+    act_type="relu",
+):
+    """
+    Pixel shuffle layer
+    (Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional
+    Neural Network, CVPR17)
+    """
+    conv = conv_block(
+        in_nc,
+        out_nc * (upscale_factor**2),
+        kernel_size,
+        stride,
+        bias=bias,
+        pad_type=pad_type,
+        norm_type=None,
+        act_type=None,
+    )
+    pixel_shuffle = nn.PixelShuffle(upscale_factor)
+    n = norm(norm_type, out_nc) if norm_type else None
+    a = act(act_type) if act_type else None
+    return sequential(conv, pixel_shuffle, n, a)
+def upconv_block(
+    in_nc: int,
+    out_nc: int,
+    upscale_factor=2,
+    kernel_size=3,
+    stride=1,
+    bias=True,
+    pad_type="zero",
+    norm_type: str | None = None,
+    act_type="relu",
+    mode="nearest",
+    c2x2=False,
+):
+    # Up conv
+    # described in https://distill.pub/2016/deconv-checkerboard/
+    upsample = nn.Upsample(scale_factor=upscale_factor, mode=mode)
+    conv = conv_block(
+        in_nc,
+        out_nc,
+        kernel_size,
+        stride,
+        bias=bias,
+        pad_type=pad_type,
+        norm_type=norm_type,
+        act_type=act_type,
+        c2x2=c2x2,
+    )
+    return sequential(upsample, conv)

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/LICENSE-GFPGAN ADDED Viewed

	@@ -0,0 +1,351 @@

+Tencent is pleased to support the open source community by making GFPGAN available.
+Copyright (C) 2021 THL A29 Limited, a Tencent company.  All rights reserved.
+GFPGAN is licensed under the Apache License Version 2.0 except for the third-party components listed below.
+Terms of the Apache License Version 2.0:
+---------------------------------------------
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+1. Definitions.
+“License” shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
+“Licensor” shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
+“Legal Entity” shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+“You” (or “Your”) shall mean an individual or Legal Entity exercising permissions granted by this License.
+“Source” form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
+“Object” form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
+“Work” shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
+“Derivative Works” shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
+“Contribution” shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, “submitted” means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as “Not a Contribution.”
+“Contributor” shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
+2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
+3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
+4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
+You must give any other recipients of the Work or Derivative Works a copy of this License; and
+You must cause any modified files to carry prominent notices stating that You changed the files; and
+You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
+If the Work includes a “NOTICE” text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
+You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
+5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
+6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
+8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
+9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
+END OF TERMS AND CONDITIONS
+Other  dependencies and licenses:
+Open Source Software licensed under the Apache 2.0 license and Other Licenses of the Third-Party Components therein:
+---------------------------------------------
+1. basicsr
+Copyright 2018-2020 BasicSR Authors
+This BasicSR project is released under the Apache 2.0 license.
+A copy of Apache 2.0 is included in this file.
+StyleGAN2
+The codes are modified from the repository stylegan2-pytorch. Many thanks to the author - Kim Seonghyeon 😊 for translating from the official TensorFlow codes to PyTorch ones. Here is the license of stylegan2-pytorch.
+The official repository is https://github.com/NVlabs/stylegan2, and here is the NVIDIA license.
+DFDNet
+The codes are largely modified from the repository DFDNet. Their license is Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
+Terms of the Nvidia License:
+---------------------------------------------
+1. Definitions
+"Licensor" means any person or entity that distributes its Work.
+"Software" means the original work of authorship made available under
+this License.
+"Work" means the Software and any additions to or derivative works of
+the Software that are made available under this License.
+"Nvidia Processors" means any central processing unit (CPU), graphics
+processing unit (GPU), field-programmable gate array (FPGA),
+application-specific integrated circuit (ASIC) or any combination
+thereof designed, made, sold, or provided by Nvidia or its affiliates.
+The terms "reproduce," "reproduction," "derivative works," and
+"distribution" have the meaning as provided under U.S. copyright law;
+provided, however, that for the purposes of this License, derivative
+works shall not include works that remain separable from, or merely
+link (or bind by name) to the interfaces of, the Work.
+Works, including the Software, are "made available" under this License
+by including in or with the Work either (a) a copyright notice
+referencing the applicability of this License to the Work, or (b) a
+copy of this License.
+2. License Grants
+    2.1 Copyright Grant. Subject to the terms and conditions of this
+    License, each Licensor grants to you a perpetual, worldwide,
+    non-exclusive, royalty-free, copyright license to reproduce,
+    prepare derivative works of, publicly display, publicly perform,
+    sublicense and distribute its Work and any resulting derivative
+    works in any form.
+3. Limitations
+    3.1 Redistribution. You may reproduce or distribute the Work only
+    if (a) you do so under this License, (b) you include a complete
+    copy of this License with your distribution, and (c) you retain
+    without modification any copyright, patent, trademark, or
+    attribution notices that are present in the Work.
+    3.2 Derivative Works. You may specify that additional or different
+    terms apply to the use, reproduction, and distribution of your
+    derivative works of the Work ("Your Terms") only if (a) Your Terms
+    provide that the use limitation in Section 3.3 applies to your
+    derivative works, and (b) you identify the specific derivative
+    works that are subject to Your Terms. Notwithstanding Your Terms,
+    this License (including the redistribution requirements in Section
+    3.1) will continue to apply to the Work itself.
+    3.3 Use Limitation. The Work and any derivative works thereof only
+    may be used or intended for use non-commercially. The Work or
+    derivative works thereof may be used or intended for use by Nvidia
+    or its affiliates commercially or non-commercially. As used herein,
+    "non-commercially" means for research or evaluation purposes only.
+    3.4 Patent Claims. If you bring or threaten to bring a patent claim
+    against any Licensor (including any claim, cross-claim or
+    counterclaim in a lawsuit) to enforce any patents that you allege
+    are infringed by any Work, then your rights under this License from
+    such Licensor (including the grants in Sections 2.1 and 2.2) will
+    terminate immediately.
+    3.5 Trademarks. This License does not grant any rights to use any
+    Licensor's or its affiliates' names, logos, or trademarks, except
+    as necessary to reproduce the notices described in this License.
+    3.6 Termination. If you violate any term of this License, then your
+    rights under this License (including the grants in Sections 2.1 and
+    2.2) will terminate immediately.
+4. Disclaimer of Warranty.
+THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
+NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
+THIS LICENSE.
+5. Limitation of Liability.
+EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
+THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
+SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
+INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
+OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
+(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
+LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
+COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGES.
+MIT License
+Copyright (c) 2019 Kim Seonghyeon
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+Open Source Software licensed under the BSD 3-Clause license:
+---------------------------------------------
+1. torchvision
+Copyright (c) Soumith Chintala 2016,
+All rights reserved.
+2. torch
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+Terms of the BSD 3-Clause License:
+---------------------------------------------
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+Open Source Software licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
+---------------------------------------------
+1. numpy
+Copyright (c) 2005-2020, NumPy Developers.
+All rights reserved.
+A copy of BSD 3-Clause License is included in this file.
+The NumPy repository and source distributions bundle several libraries that are
+compatibly licensed.  We list these here.
+Name: Numpydoc
+Files: doc/sphinxext/numpydoc/*
+License: BSD-2-Clause
+  For details, see doc/sphinxext/LICENSE.txt
+Name: scipy-sphinx-theme
+Files: doc/scipy-sphinx-theme/*
+License: BSD-3-Clause AND PSF-2.0 AND Apache-2.0
+  For details, see doc/scipy-sphinx-theme/LICENSE.txt
+Name: lapack-lite
+Files: numpy/linalg/lapack_lite/*
+License: BSD-3-Clause
+  For details, see numpy/linalg/lapack_lite/LICENSE.txt
+Name: tempita
+Files: tools/npy_tempita/*
+License: MIT
+  For details, see tools/npy_tempita/license.txt
+Name: dragon4
+Files: numpy/core/src/multiarray/dragon4.c
+License: MIT
+  For license text, see numpy/core/src/multiarray/dragon4.c
+Open Source Software licensed under the MIT license:
+---------------------------------------------
+1. facexlib
+Copyright (c) 2020 Xintao Wang
+2. opencv-python
+Copyright (c) Olli-Pekka Heinisuo
+Please note that only files in cv2 package are used.
+Terms of the MIT License:
+---------------------------------------------
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Open Source Software licensed under the MIT license and Other Licenses of the Third-Party Components therein:
+---------------------------------------------
+1. tqdm
+Copyright (c) 2013 noamraph
+`tqdm` is a product of collaborative work.
+Unless otherwise stated, all authors (see commit logs) retain copyright
+for their respective work, and release the work under the MIT licence
+(text below).
+Exceptions or notable authors are listed below
+in reverse chronological order:
+* files: *
+  MPLv2.0 2015-2020 (c) Casper da Costa-Luis
+  [casperdcl](https://github.com/casperdcl).
+* files: tqdm/_tqdm.py
+  MIT 2016 (c) [PR #96] on behalf of Google Inc.
+* files: tqdm/_tqdm.py setup.py README.rst MANIFEST.in .gitignore
+  MIT 2013 (c) Noam Yorav-Raphael, original author.
+[PR #96]: https://github.com/tqdm/tqdm/pull/96
+Mozilla Public Licence (MPL) v. 2.0 - Exhibit A
+-----------------------------------------------
+This Source Code Form is subject to the terms of the
+Mozilla Public License, v. 2.0.
+If a copy of the MPL was not distributed with this file,
+You can obtain one at https://mozilla.org/MPL/2.0/.
+MIT License (MIT)
+-----------------
+Copyright (c) 2013 noamraph
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/LICENSE-RestoreFormer ADDED Viewed

	@@ -0,0 +1,351 @@

+Tencent is pleased to support the open source community by making GFPGAN available.
+Copyright (C) 2021 THL A29 Limited, a Tencent company.  All rights reserved.
+GFPGAN is licensed under the Apache License Version 2.0 except for the third-party components listed below.
+Terms of the Apache License Version 2.0:
+---------------------------------------------
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+1. Definitions.
+“License” shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
+“Licensor” shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
+“Legal Entity” shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+“You” (or “Your”) shall mean an individual or Legal Entity exercising permissions granted by this License.
+“Source” form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
+“Object” form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
+“Work” shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
+“Derivative Works” shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
+“Contribution” shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, “submitted” means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as “Not a Contribution.”
+“Contributor” shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
+2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
+3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
+4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
+You must give any other recipients of the Work or Derivative Works a copy of this License; and
+You must cause any modified files to carry prominent notices stating that You changed the files; and
+You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
+If the Work includes a “NOTICE” text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
+You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
+5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
+6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
+8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
+9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
+END OF TERMS AND CONDITIONS
+Other  dependencies and licenses:
+Open Source Software licensed under the Apache 2.0 license and Other Licenses of the Third-Party Components therein:
+---------------------------------------------
+1. basicsr
+Copyright 2018-2020 BasicSR Authors
+This BasicSR project is released under the Apache 2.0 license.
+A copy of Apache 2.0 is included in this file.
+StyleGAN2
+The codes are modified from the repository stylegan2-pytorch. Many thanks to the author - Kim Seonghyeon 😊 for translating from the official TensorFlow codes to PyTorch ones. Here is the license of stylegan2-pytorch.
+The official repository is https://github.com/NVlabs/stylegan2, and here is the NVIDIA license.
+DFDNet
+The codes are largely modified from the repository DFDNet. Their license is Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
+Terms of the Nvidia License:
+---------------------------------------------
+1. Definitions
+"Licensor" means any person or entity that distributes its Work.
+"Software" means the original work of authorship made available under
+this License.
+"Work" means the Software and any additions to or derivative works of
+the Software that are made available under this License.
+"Nvidia Processors" means any central processing unit (CPU), graphics
+processing unit (GPU), field-programmable gate array (FPGA),
+application-specific integrated circuit (ASIC) or any combination
+thereof designed, made, sold, or provided by Nvidia or its affiliates.
+The terms "reproduce," "reproduction," "derivative works," and
+"distribution" have the meaning as provided under U.S. copyright law;
+provided, however, that for the purposes of this License, derivative
+works shall not include works that remain separable from, or merely
+link (or bind by name) to the interfaces of, the Work.
+Works, including the Software, are "made available" under this License
+by including in or with the Work either (a) a copyright notice
+referencing the applicability of this License to the Work, or (b) a
+copy of this License.
+2. License Grants
+    2.1 Copyright Grant. Subject to the terms and conditions of this
+    License, each Licensor grants to you a perpetual, worldwide,
+    non-exclusive, royalty-free, copyright license to reproduce,
+    prepare derivative works of, publicly display, publicly perform,
+    sublicense and distribute its Work and any resulting derivative
+    works in any form.
+3. Limitations
+    3.1 Redistribution. You may reproduce or distribute the Work only
+    if (a) you do so under this License, (b) you include a complete
+    copy of this License with your distribution, and (c) you retain
+    without modification any copyright, patent, trademark, or
+    attribution notices that are present in the Work.
+    3.2 Derivative Works. You may specify that additional or different
+    terms apply to the use, reproduction, and distribution of your
+    derivative works of the Work ("Your Terms") only if (a) Your Terms
+    provide that the use limitation in Section 3.3 applies to your
+    derivative works, and (b) you identify the specific derivative
+    works that are subject to Your Terms. Notwithstanding Your Terms,
+    this License (including the redistribution requirements in Section
+    3.1) will continue to apply to the Work itself.
+    3.3 Use Limitation. The Work and any derivative works thereof only
+    may be used or intended for use non-commercially. The Work or
+    derivative works thereof may be used or intended for use by Nvidia
+    or its affiliates commercially or non-commercially. As used herein,
+    "non-commercially" means for research or evaluation purposes only.
+    3.4 Patent Claims. If you bring or threaten to bring a patent claim
+    against any Licensor (including any claim, cross-claim or
+    counterclaim in a lawsuit) to enforce any patents that you allege
+    are infringed by any Work, then your rights under this License from
+    such Licensor (including the grants in Sections 2.1 and 2.2) will
+    terminate immediately.
+    3.5 Trademarks. This License does not grant any rights to use any
+    Licensor's or its affiliates' names, logos, or trademarks, except
+    as necessary to reproduce the notices described in this License.
+    3.6 Termination. If you violate any term of this License, then your
+    rights under this License (including the grants in Sections 2.1 and
+    2.2) will terminate immediately.
+4. Disclaimer of Warranty.
+THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
+NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
+THIS LICENSE.
+5. Limitation of Liability.
+EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
+THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
+SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
+INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
+OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
+(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
+LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
+COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGES.
+MIT License
+Copyright (c) 2019 Kim Seonghyeon
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+Open Source Software licensed under the BSD 3-Clause license:
+---------------------------------------------
+1. torchvision
+Copyright (c) Soumith Chintala 2016,
+All rights reserved.
+2. torch
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+Terms of the BSD 3-Clause License:
+---------------------------------------------
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+Open Source Software licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
+---------------------------------------------
+1. numpy
+Copyright (c) 2005-2020, NumPy Developers.
+All rights reserved.
+A copy of BSD 3-Clause License is included in this file.
+The NumPy repository and source distributions bundle several libraries that are
+compatibly licensed.  We list these here.
+Name: Numpydoc
+Files: doc/sphinxext/numpydoc/*
+License: BSD-2-Clause
+  For details, see doc/sphinxext/LICENSE.txt
+Name: scipy-sphinx-theme
+Files: doc/scipy-sphinx-theme/*
+License: BSD-3-Clause AND PSF-2.0 AND Apache-2.0
+  For details, see doc/scipy-sphinx-theme/LICENSE.txt
+Name: lapack-lite
+Files: numpy/linalg/lapack_lite/*
+License: BSD-3-Clause
+  For details, see numpy/linalg/lapack_lite/LICENSE.txt
+Name: tempita
+Files: tools/npy_tempita/*
+License: MIT
+  For details, see tools/npy_tempita/license.txt
+Name: dragon4
+Files: numpy/core/src/multiarray/dragon4.c
+License: MIT
+  For license text, see numpy/core/src/multiarray/dragon4.c
+Open Source Software licensed under the MIT license:
+---------------------------------------------
+1. facexlib
+Copyright (c) 2020 Xintao Wang
+2. opencv-python
+Copyright (c) Olli-Pekka Heinisuo
+Please note that only files in cv2 package are used.
+Terms of the MIT License:
+---------------------------------------------
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Open Source Software licensed under the MIT license and Other Licenses of the Third-Party Components therein:
+---------------------------------------------
+1. tqdm
+Copyright (c) 2013 noamraph
+`tqdm` is a product of collaborative work.
+Unless otherwise stated, all authors (see commit logs) retain copyright
+for their respective work, and release the work under the MIT licence
+(text below).
+Exceptions or notable authors are listed below
+in reverse chronological order:
+* files: *
+  MPLv2.0 2015-2020 (c) Casper da Costa-Luis
+  [casperdcl](https://github.com/casperdcl).
+* files: tqdm/_tqdm.py
+  MIT 2016 (c) [PR #96] on behalf of Google Inc.
+* files: tqdm/_tqdm.py setup.py README.rst MANIFEST.in .gitignore
+  MIT 2013 (c) Noam Yorav-Raphael, original author.
+[PR #96]: https://github.com/tqdm/tqdm/pull/96
+Mozilla Public Licence (MPL) v. 2.0 - Exhibit A
+-----------------------------------------------
+This Source Code Form is subject to the terms of the
+Mozilla Public License, v. 2.0.
+If a copy of the MPL was not distributed with this file,
+You can obtain one at https://mozilla.org/MPL/2.0/.
+MIT License (MIT)
+-----------------
+Copyright (c) 2013 noamraph
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/LICENSE-codeformer ADDED Viewed

	@@ -0,0 +1,35 @@

+S-Lab License 1.0
+Copyright 2022 S-Lab
+Redistribution and use for non-commercial purpose in source and
+binary forms, with or without modification, are permitted provided
+that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in
+   the documentation and/or other materials provided with the
+   distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+In the event that redistribution and/or use for commercial purpose in
+source or binary forms, with or without modification is required,
+please contact the contributor(s) of the work.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/arcface_arch.py ADDED Viewed

	@@ -0,0 +1,265 @@

+import torch.nn as nn
+def conv3x3(inplanes, outplanes, stride=1):
+    """A simple wrapper for 3x3 convolution with padding.
+    Args:
+        inplanes (int): Channel number of inputs.
+        outplanes (int): Channel number of outputs.
+        stride (int): Stride in convolution. Default: 1.
+    """
+    return nn.Conv2d(
+        inplanes, outplanes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
+class BasicBlock(nn.Module):
+    """Basic residual block used in the ResNetArcFace architecture.
+    Args:
+        inplanes (int): Channel number of inputs.
+        planes (int): Channel number of outputs.
+        stride (int): Stride in convolution. Default: 1.
+        downsample (nn.Module): The downsample module. Default: None.
+    """
+    expansion = 1  # output channel expansion ratio
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class IRBlock(nn.Module):
+    """Improved residual block (IR Block) used in the ResNetArcFace architecture.
+    Args:
+        inplanes (int): Channel number of inputs.
+        planes (int): Channel number of outputs.
+        stride (int): Stride in convolution. Default: 1.
+        downsample (nn.Module): The downsample module. Default: None.
+        use_se (bool): Whether use the SEBlock (squeeze and excitation block). Default: True.
+    """
+    expansion = 1  # output channel expansion ratio
+    def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True):
+        super(IRBlock, self).__init__()
+        self.bn0 = nn.BatchNorm2d(inplanes)
+        self.conv1 = conv3x3(inplanes, inplanes)
+        self.bn1 = nn.BatchNorm2d(inplanes)
+        self.prelu = nn.PReLU()
+        self.conv2 = conv3x3(inplanes, planes, stride)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+        self.use_se = use_se
+        if self.use_se:
+            self.se = SEBlock(planes)
+    def forward(self, x):
+        residual = x
+        out = self.bn0(x)
+        out = self.conv1(out)
+        out = self.bn1(out)
+        out = self.prelu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.use_se:
+            out = self.se(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.prelu(out)
+        return out
+class Bottleneck(nn.Module):
+    """Bottleneck block used in the ResNetArcFace architecture.
+    Args:
+        inplanes (int): Channel number of inputs.
+        planes (int): Channel number of outputs.
+        stride (int): Stride in convolution. Default: 1.
+        downsample (nn.Module): The downsample module. Default: None.
+    """
+    expansion = 4  # output channel expansion ratio
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(
+            planes, planes * self.expansion, kernel_size=1, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class SEBlock(nn.Module):
+    """The squeeze-and-excitation block (SEBlock) used in the IRBlock.
+    Args:
+        channel (int): Channel number of inputs.
+        reduction (int): Channel reduction ration. Default: 16.
+    """
+    def __init__(self, channel, reduction=16):
+        super(SEBlock, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(
+            1
+        )  # pool to 1x1 without spatial information
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction),
+            nn.PReLU(),
+            nn.Linear(channel // reduction, channel),
+            nn.Sigmoid(),
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y
+class ResNetArcFace(nn.Module):
+    """ArcFace with ResNet architectures.
+    Ref: ArcFace: Additive Angular Margin Loss for Deep Face Recognition.
+    Args:
+        block (str): Block used in the ArcFace architecture.
+        layers (tuple(int)): Block numbers in each layer.
+        use_se (bool): Whether use the SEBlock (squeeze and excitation block). Default: True.
+    """
+    def __init__(self, block, layers, use_se=True):
+        if block == "IRBlock":
+            block = IRBlock
+        self.inplanes = 64
+        self.use_se = use_se
+        super(ResNetArcFace, self).__init__()
+        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.prelu = nn.PReLU()
+        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.bn4 = nn.BatchNorm2d(512)
+        self.dropout = nn.Dropout()
+        self.fc5 = nn.Linear(512 * 8 * 8, 512)
+        self.bn5 = nn.BatchNorm1d(512)
+        # initialization
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.xavier_normal_(m.weight)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                nn.init.constant_(m.bias, 0)
+    def _make_layer(self, block, planes, num_blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(
+            block(self.inplanes, planes, stride, downsample, use_se=self.use_se)
+        )
+        self.inplanes = planes
+        for _ in range(1, num_blocks):
+            layers.append(block(self.inplanes, planes, use_se=self.use_se))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.prelu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.bn4(x)
+        x = self.dropout(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc5(x)
+        x = self.bn5(x)
+        return x

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/codeformer.py ADDED Viewed

	@@ -0,0 +1,790 @@

+"""
+Modified from https://github.com/sczhou/CodeFormer
+VQGAN code, adapted from the original created by the Unleashing Transformers authors:
+https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py
+This verison of the arch specifically was gathered from an old version of GFPGAN. If this is a problem, please contact me.
+"""
+import math
+from typing import Optional
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import logging as logger
+from torch import Tensor
+class VectorQuantizer(nn.Module):
+    def __init__(self, codebook_size, emb_dim, beta):
+        super(VectorQuantizer, self).__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.beta = beta  # commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2
+        self.embedding = nn.Embedding(self.codebook_size, self.emb_dim)
+        self.embedding.weight.data.uniform_(
+            -1.0 / self.codebook_size, 1.0 / self.codebook_size
+        )
+    def forward(self, z):
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = z.permute(0, 2, 3, 1).contiguous()
+        z_flattened = z.view(-1, self.emb_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+        d = (
+            (z_flattened**2).sum(dim=1, keepdim=True)
+            + (self.embedding.weight**2).sum(1)
+            - 2 * torch.matmul(z_flattened, self.embedding.weight.t())
+        )
+        mean_distance = torch.mean(d)
+        # find closest encodings
+        # min_encoding_indices = torch.argmin(d, dim=1).unsqueeze(1)
+        min_encoding_scores, min_encoding_indices = torch.topk(
+            d, 1, dim=1, largest=False
+        )
+        # [0-1], higher score, higher confidence
+        min_encoding_scores = torch.exp(-min_encoding_scores / 10)
+        min_encodings = torch.zeros(
+            min_encoding_indices.shape[0], self.codebook_size
+        ).to(z)
+        min_encodings.scatter_(1, min_encoding_indices, 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape)
+        # compute loss for embedding
+        loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * torch.mean(
+            (z_q - z.detach()) ** 2
+        )
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+        # perplexity
+        e_mean = torch.mean(min_encodings, dim=0)
+        perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10)))
+        # reshape back to match original input shape
+        z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        return (
+            z_q,
+            loss,
+            {
+                "perplexity": perplexity,
+                "min_encodings": min_encodings,
+                "min_encoding_indices": min_encoding_indices,
+                "min_encoding_scores": min_encoding_scores,
+                "mean_distance": mean_distance,
+            },
+        )
+    def get_codebook_feat(self, indices, shape):
+        # input indices: batch*token_num -> (batch*token_num)*1
+        # shape: batch, height, width, channel
+        indices = indices.view(-1, 1)
+        min_encodings = torch.zeros(indices.shape[0], self.codebook_size).to(indices)
+        min_encodings.scatter_(1, indices, 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings.float(), self.embedding.weight)
+        if shape is not None:  # reshape back to match original input shape
+            z_q = z_q.view(shape).permute(0, 3, 1, 2).contiguous()
+        return z_q
+class GumbelQuantizer(nn.Module):
+    def __init__(
+        self,
+        codebook_size,
+        emb_dim,
+        num_hiddens,
+        straight_through=False,
+        kl_weight=5e-4,
+        temp_init=1.0,
+    ):
+        super().__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.straight_through = straight_through
+        self.temperature = temp_init
+        self.kl_weight = kl_weight
+        self.proj = nn.Conv2d(
+            num_hiddens, codebook_size, 1
+        )  # projects last encoder layer to quantized logits
+        self.embed = nn.Embedding(codebook_size, emb_dim)
+    def forward(self, z):
+        hard = self.straight_through if self.training else True
+        logits = self.proj(z)
+        soft_one_hot = F.gumbel_softmax(logits, tau=self.temperature, dim=1, hard=hard)
+        z_q = torch.einsum("b n h w, n d -> b d h w", soft_one_hot, self.embed.weight)
+        # + kl divergence to the prior loss
+        qy = F.softmax(logits, dim=1)
+        diff = (
+            self.kl_weight
+            * torch.sum(qy * torch.log(qy * self.codebook_size + 1e-10), dim=1).mean()
+        )
+        min_encoding_indices = soft_one_hot.argmax(dim=1)
+        return z_q, diff, {"min_encoding_indices": min_encoding_indices}
+class Downsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=3, stride=2, padding=0
+        )
+    def forward(self, x):
+        pad = (0, 1, 0, 1)
+        x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
+        x = self.conv(x)
+        return x
+class Upsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(
+            in_channels, in_channels, kernel_size=3, stride=1, padding=1
+        )
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=2.0, mode="nearest")
+        x = self.conv(x)
+        return x
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+        self.norm = normalize(in_channels)
+        self.q = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.k = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.v = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.proj_out = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+    def forward(self, x):
+        h_ = x
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+        # compute attention
+        b, c, h, w = q.shape
+        q = q.reshape(b, c, h * w)
+        q = q.permute(0, 2, 1)
+        k = k.reshape(b, c, h * w)
+        w_ = torch.bmm(q, k)
+        w_ = w_ * (int(c) ** (-0.5))
+        w_ = F.softmax(w_, dim=2)
+        # attend to values
+        v = v.reshape(b, c, h * w)
+        w_ = w_.permute(0, 2, 1)
+        h_ = torch.bmm(v, w_)
+        h_ = h_.reshape(b, c, h, w)
+        h_ = self.proj_out(h_)
+        return x + h_
+class Encoder(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        nf,
+        out_channels,
+        ch_mult,
+        num_res_blocks,
+        resolution,
+        attn_resolutions,
+    ):
+        super().__init__()
+        self.nf = nf
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.attn_resolutions = attn_resolutions
+        curr_res = self.resolution
+        in_ch_mult = (1,) + tuple(ch_mult)
+        blocks = []
+        # initial convultion
+        blocks.append(nn.Conv2d(in_channels, nf, kernel_size=3, stride=1, padding=1))
+        # residual and downsampling blocks, with attention on smaller res (16x16)
+        for i in range(self.num_resolutions):
+            block_in_ch = nf * in_ch_mult[i]
+            block_out_ch = nf * ch_mult[i]
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+                if curr_res in attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+            if i != self.num_resolutions - 1:
+                blocks.append(Downsample(block_in_ch))
+                curr_res = curr_res // 2
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))  # type: ignore
+        blocks.append(AttnBlock(block_in_ch))  # type: ignore
+        blocks.append(ResBlock(block_in_ch, block_in_ch))  # type: ignore
+        # normalise and convert to latent size
+        blocks.append(normalize(block_in_ch))  # type: ignore
+        blocks.append(
+            nn.Conv2d(block_in_ch, out_channels, kernel_size=3, stride=1, padding=1)  # type: ignore
+        )
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+        return x
+class Generator(nn.Module):
+    def __init__(self, nf, ch_mult, res_blocks, img_size, attn_resolutions, emb_dim):
+        super().__init__()
+        self.nf = nf
+        self.ch_mult = ch_mult
+        self.num_resolutions = len(self.ch_mult)
+        self.num_res_blocks = res_blocks
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.in_channels = emb_dim
+        self.out_channels = 3
+        block_in_ch = self.nf * self.ch_mult[-1]
+        curr_res = self.resolution // 2 ** (self.num_resolutions - 1)
+        blocks = []
+        # initial conv
+        blocks.append(
+            nn.Conv2d(self.in_channels, block_in_ch, kernel_size=3, stride=1, padding=1)
+        )
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        blocks.append(AttnBlock(block_in_ch))
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        for i in reversed(range(self.num_resolutions)):
+            block_out_ch = self.nf * self.ch_mult[i]
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+                if curr_res in self.attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+            if i != 0:
+                blocks.append(Upsample(block_in_ch))
+                curr_res = curr_res * 2
+        blocks.append(normalize(block_in_ch))
+        blocks.append(
+            nn.Conv2d(
+                block_in_ch, self.out_channels, kernel_size=3, stride=1, padding=1
+            )
+        )
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+        return x
+class VQAutoEncoder(nn.Module):
+    def __init__(
+        self,
+        img_size,
+        nf,
+        ch_mult,
+        quantizer="nearest",
+        res_blocks=2,
+        attn_resolutions=[16],
+        codebook_size=1024,
+        emb_dim=256,
+        beta=0.25,
+        gumbel_straight_through=False,
+        gumbel_kl_weight=1e-8,
+        model_path=None,
+    ):
+        super().__init__()
+        self.in_channels = 3
+        self.nf = nf
+        self.n_blocks = res_blocks
+        self.codebook_size = codebook_size
+        self.embed_dim = emb_dim
+        self.ch_mult = ch_mult
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.quantizer_type = quantizer
+        self.encoder = Encoder(
+            self.in_channels,
+            self.nf,
+            self.embed_dim,
+            self.ch_mult,
+            self.n_blocks,
+            self.resolution,
+            self.attn_resolutions,
+        )
+        if self.quantizer_type == "nearest":
+            self.beta = beta  # 0.25
+            self.quantize = VectorQuantizer(
+                self.codebook_size, self.embed_dim, self.beta
+            )
+        elif self.quantizer_type == "gumbel":
+            self.gumbel_num_hiddens = emb_dim
+            self.straight_through = gumbel_straight_through
+            self.kl_weight = gumbel_kl_weight
+            self.quantize = GumbelQuantizer(
+                self.codebook_size,
+                self.embed_dim,
+                self.gumbel_num_hiddens,
+                self.straight_through,
+                self.kl_weight,
+            )
+        self.generator = Generator(
+            nf, ch_mult, res_blocks, img_size, attn_resolutions, emb_dim
+        )
+        if model_path is not None:
+            chkpt = torch.load(model_path, map_location="cpu")
+            if "params_ema" in chkpt:
+                self.load_state_dict(
+                    torch.load(model_path, map_location="cpu")["params_ema"]
+                )
+                logger.info(f"vqgan is loaded from: {model_path} [params_ema]")
+            elif "params" in chkpt:
+                self.load_state_dict(
+                    torch.load(model_path, map_location="cpu")["params"]
+                )
+                logger.info(f"vqgan is loaded from: {model_path} [params]")
+            else:
+                raise ValueError("Wrong params!")
+    def forward(self, x):
+        x = self.encoder(x)
+        quant, codebook_loss, quant_stats = self.quantize(x)
+        x = self.generator(quant)
+        return x, codebook_loss, quant_stats
+def calc_mean_std(feat, eps=1e-5):
+    """Calculate mean and std for adaptive_instance_normalization.
+    Args:
+        feat (Tensor): 4D tensor.
+        eps (float): A small value added to the variance to avoid
+            divide-by-zero. Default: 1e-5.
+    """
+    size = feat.size()
+    assert len(size) == 4, "The input feature should be 4D tensor."
+    b, c = size[:2]
+    feat_var = feat.view(b, c, -1).var(dim=2) + eps
+    feat_std = feat_var.sqrt().view(b, c, 1, 1)
+    feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1)
+    return feat_mean, feat_std
+def adaptive_instance_normalization(content_feat, style_feat):
+    """Adaptive instance normalization.
+    Adjust the reference features to have the similar color and illuminations
+    as those in the degradate features.
+    Args:
+        content_feat (Tensor): The reference feature.
+        style_feat (Tensor): The degradate features.
+    """
+    size = content_feat.size()
+    style_mean, style_std = calc_mean_std(style_feat)
+    content_mean, content_std = calc_mean_std(content_feat)
+    normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(
+        size
+    )
+    return normalized_feat * style_std.expand(size) + style_mean.expand(size)
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(
+        self, num_pos_feats=64, temperature=10000, normalize=False, scale=None
+    ):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+    def forward(self, x, mask=None):
+        if mask is None:
+            mask = torch.zeros(
+                (x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool
+            )
+        not_mask = ~mask  # pylint: disable=invalid-unary-operand-type
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(f"activation should be relu/gelu, not {activation}.")
+class TransformerSALayer(nn.Module):
+    def __init__(
+        self, embed_dim, nhead=8, dim_mlp=2048, dropout=0.0, activation="gelu"
+    ):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(embed_dim, nhead, dropout=dropout)
+        # Implementation of Feedforward model - MLP
+        self.linear1 = nn.Linear(embed_dim, dim_mlp)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_mlp, embed_dim)
+        self.norm1 = nn.LayerNorm(embed_dim)
+        self.norm2 = nn.LayerNorm(embed_dim)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward(
+        self,
+        tgt,
+        tgt_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
+        # self attention
+        tgt2 = self.norm1(tgt)
+        q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(
+            q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
+        tgt = tgt + self.dropout1(tgt2)
+        # ffn
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout2(tgt2)
+        return tgt
+def normalize(in_channels):
+    return torch.nn.GroupNorm(
+        num_groups=32, num_channels=in_channels, eps=1e-6, affine=True
+    )
+@torch.jit.script  # type: ignore
+def swish(x):
+    return x * torch.sigmoid(x)
+class ResBlock(nn.Module):
+    def __init__(self, in_channels, out_channels=None):
+        super(ResBlock, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = in_channels if out_channels is None else out_channels
+        self.norm1 = normalize(in_channels)
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, kernel_size=3, stride=1, padding=1  # type: ignore
+        )
+        self.norm2 = normalize(out_channels)
+        self.conv2 = nn.Conv2d(
+            out_channels, out_channels, kernel_size=3, stride=1, padding=1  # type: ignore
+        )
+        if self.in_channels != self.out_channels:
+            self.conv_out = nn.Conv2d(
+                in_channels, out_channels, kernel_size=1, stride=1, padding=0  # type: ignore
+            )
+    def forward(self, x_in):
+        x = x_in
+        x = self.norm1(x)
+        x = swish(x)
+        x = self.conv1(x)
+        x = self.norm2(x)
+        x = swish(x)
+        x = self.conv2(x)
+        if self.in_channels != self.out_channels:
+            x_in = self.conv_out(x_in)
+        return x + x_in
+class Fuse_sft_block(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super().__init__()
+        self.encode_enc = ResBlock(2 * in_ch, out_ch)
+        self.scale = nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
+        )
+        self.shift = nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
+        )
+    def forward(self, enc_feat, dec_feat, w=1):
+        enc_feat = self.encode_enc(torch.cat([enc_feat, dec_feat], dim=1))
+        scale = self.scale(enc_feat)
+        shift = self.shift(enc_feat)
+        residual = w * (dec_feat * scale + shift)
+        out = dec_feat + residual
+        return out
+class CodeFormer(VQAutoEncoder):
+    def __init__(self, state_dict):
+        dim_embd = 512
+        n_head = 8
+        n_layers = 9
+        codebook_size = 1024
+        latent_size = 256
+        connect_list = ["32", "64", "128", "256"]
+        fix_modules = ["quantize", "generator"]
+        # This is just a guess as I only have one model to look at
+        position_emb = state_dict["position_emb"]
+        dim_embd = position_emb.shape[1]
+        latent_size = position_emb.shape[0]
+        try:
+            n_layers = len(
+                set([x.split(".")[1] for x in state_dict.keys() if "ft_layers" in x])
+            )
+        except:
+            pass
+        codebook_size = state_dict["quantize.embedding.weight"].shape[0]
+        # This is also just another guess
+        n_head_exp = (
+            state_dict["ft_layers.0.self_attn.in_proj_weight"].shape[0] // dim_embd
+        )
+        n_head = 2**n_head_exp
+        in_nc = state_dict["encoder.blocks.0.weight"].shape[1]
+        self.model_arch = "CodeFormer"
+        self.sub_type = "Face SR"
+        self.scale = 8
+        self.in_nc = in_nc
+        self.out_nc = in_nc
+        self.state = state_dict
+        self.supports_fp16 = False
+        self.supports_bf16 = True
+        self.min_size_restriction = 16
+        super(CodeFormer, self).__init__(
+            512, 64, [1, 2, 2, 4, 4, 8], "nearest", 2, [16], codebook_size
+        )
+        if fix_modules is not None:
+            for module in fix_modules:
+                for param in getattr(self, module).parameters():
+                    param.requires_grad = False
+        self.connect_list = connect_list
+        self.n_layers = n_layers
+        self.dim_embd = dim_embd
+        self.dim_mlp = dim_embd * 2
+        self.position_emb = nn.Parameter(torch.zeros(latent_size, self.dim_embd))  # type: ignore
+        self.feat_emb = nn.Linear(256, self.dim_embd)
+        # transformer
+        self.ft_layers = nn.Sequential(
+            *[
+                TransformerSALayer(
+                    embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0
+                )
+                for _ in range(self.n_layers)
+            ]
+        )
+        # logits_predict head
+        self.idx_pred_layer = nn.Sequential(
+            nn.LayerNorm(dim_embd), nn.Linear(dim_embd, codebook_size, bias=False)
+        )
+        self.channels = {
+            "16": 512,
+            "32": 256,
+            "64": 256,
+            "128": 128,
+            "256": 128,
+            "512": 64,
+        }
+        # after second residual block for > 16, before attn layer for ==16
+        self.fuse_encoder_block = {
+            "512": 2,
+            "256": 5,
+            "128": 8,
+            "64": 11,
+            "32": 14,
+            "16": 18,
+        }
+        # after first residual block for > 16, before attn layer for ==16
+        self.fuse_generator_block = {
+            "16": 6,
+            "32": 9,
+            "64": 12,
+            "128": 15,
+            "256": 18,
+            "512": 21,
+        }
+        # fuse_convs_dict
+        self.fuse_convs_dict = nn.ModuleDict()
+        for f_size in self.connect_list:
+            in_ch = self.channels[f_size]
+            self.fuse_convs_dict[f_size] = Fuse_sft_block(in_ch, in_ch)
+        self.load_state_dict(state_dict)
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def forward(self, x, weight=0.5, **kwargs):
+        detach_16 = True
+        code_only = False
+        adain = True
+        # ################### Encoder #####################
+        enc_feat_dict = {}
+        out_list = [self.fuse_encoder_block[f_size] for f_size in self.connect_list]
+        for i, block in enumerate(self.encoder.blocks):
+            x = block(x)
+            if i in out_list:
+                enc_feat_dict[str(x.shape[-1])] = x.clone()
+        lq_feat = x
+        # ################# Transformer ###################
+        # quant_feat, codebook_loss, quant_stats = self.quantize(lq_feat)
+        pos_emb = self.position_emb.unsqueeze(1).repeat(1, x.shape[0], 1)
+        # BCHW -> BC(HW) -> (HW)BC
+        feat_emb = self.feat_emb(lq_feat.flatten(2).permute(2, 0, 1))
+        query_emb = feat_emb
+        # Transformer encoder
+        for layer in self.ft_layers:
+            query_emb = layer(query_emb, query_pos=pos_emb)
+        # output logits
+        logits = self.idx_pred_layer(query_emb)  # (hw)bn
+        logits = logits.permute(1, 0, 2)  # (hw)bn -> b(hw)n
+        if code_only:  # for training stage II
+            # logits doesn't need softmax before cross_entropy loss
+            return logits, lq_feat
+        # ################# Quantization ###################
+        # if self.training:
+        #     quant_feat = torch.einsum('btn,nc->btc', [soft_one_hot, self.quantize.embedding.weight])
+        #     # b(hw)c -> bc(hw) -> bchw
+        #     quant_feat = quant_feat.permute(0,2,1).view(lq_feat.shape)
+        # ------------
+        soft_one_hot = F.softmax(logits, dim=2)
+        _, top_idx = torch.topk(soft_one_hot, 1, dim=2)
+        quant_feat = self.quantize.get_codebook_feat(
+            top_idx, shape=[x.shape[0], 16, 16, 256]  # type: ignore
+        )
+        # preserve gradients
+        # quant_feat = lq_feat + (quant_feat - lq_feat).detach()
+        if detach_16:
+            quant_feat = quant_feat.detach()  # for training stage III
+        if adain:
+            quant_feat = adaptive_instance_normalization(quant_feat, lq_feat)
+        # ################## Generator ####################
+        x = quant_feat
+        fuse_list = [self.fuse_generator_block[f_size] for f_size in self.connect_list]
+        for i, block in enumerate(self.generator.blocks):
+            x = block(x)
+            if i in fuse_list:  # fuse after i-th block
+                f_size = str(x.shape[-1])
+                if weight > 0:
+                    x = self.fuse_convs_dict[f_size](
+                        enc_feat_dict[f_size].detach(), x, weight
+                    )
+        out = x
+        # logits doesn't need softmax before cross_entropy loss
+        # return out, logits, lq_feat
+        return out, logits

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/fused_act.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# pylint: skip-file
+# type: ignore
+# modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501
+import torch
+from torch import nn
+from torch.autograd import Function
+fused_act_ext = None
+class FusedLeakyReLUFunctionBackward(Function):
+    @staticmethod
+    def forward(ctx, grad_output, out, negative_slope, scale):
+        ctx.save_for_backward(out)
+        ctx.negative_slope = negative_slope
+        ctx.scale = scale
+        empty = grad_output.new_empty(0)
+        grad_input = fused_act_ext.fused_bias_act(
+            grad_output, empty, out, 3, 1, negative_slope, scale
+        )
+        dim = [0]
+        if grad_input.ndim > 2:
+            dim += list(range(2, grad_input.ndim))
+        grad_bias = grad_input.sum(dim).detach()
+        return grad_input, grad_bias
+    @staticmethod
+    def backward(ctx, gradgrad_input, gradgrad_bias):
+        (out,) = ctx.saved_tensors
+        gradgrad_out = fused_act_ext.fused_bias_act(
+            gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale
+        )
+        return gradgrad_out, None, None, None
+class FusedLeakyReLUFunction(Function):
+    @staticmethod
+    def forward(ctx, input, bias, negative_slope, scale):
+        empty = input.new_empty(0)
+        out = fused_act_ext.fused_bias_act(
+            input, bias, empty, 3, 0, negative_slope, scale
+        )
+        ctx.save_for_backward(out)
+        ctx.negative_slope = negative_slope
+        ctx.scale = scale
+        return out
+    @staticmethod
+    def backward(ctx, grad_output):
+        (out,) = ctx.saved_tensors
+        grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply(
+            grad_output, out, ctx.negative_slope, ctx.scale
+        )
+        return grad_input, grad_bias, None, None
+class FusedLeakyReLU(nn.Module):
+    def __init__(self, channel, negative_slope=0.2, scale=2**0.5):
+        super().__init__()
+        self.bias = nn.Parameter(torch.zeros(channel))
+        self.negative_slope = negative_slope
+        self.scale = scale
+    def forward(self, input):
+        return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
+def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5):
+    return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/gfpgan_bilinear_arch.py ADDED Viewed

	@@ -0,0 +1,389 @@

+# pylint: skip-file
+# type: ignore
+import math
+import random
+import torch
+from torch import nn
+from .gfpganv1_arch import ResUpBlock
+from .stylegan2_bilinear_arch import (
+    ConvLayer,
+    EqualConv2d,
+    EqualLinear,
+    ResBlock,
+    ScaledLeakyReLU,
+    StyleGAN2GeneratorBilinear,
+)
+class StyleGAN2GeneratorBilinearSFT(StyleGAN2GeneratorBilinear):
+    """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform).
+    It is the bilinear version. It does not use the complicated UpFirDnSmooth function that is not friendly for
+    deployment. It can be easily converted to the clean version: StyleGAN2GeneratorCSFT.
+    Args:
+        out_size (int): The spatial size of outputs.
+        num_style_feat (int): Channel number of style features. Default: 512.
+        num_mlp (int): Layer number of MLP style layers. Default: 8.
+        channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+        lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01.
+        narrow (float): The narrow ratio for channels. Default: 1.
+        sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
+    """
+    def __init__(
+        self,
+        out_size,
+        num_style_feat=512,
+        num_mlp=8,
+        channel_multiplier=2,
+        lr_mlp=0.01,
+        narrow=1,
+        sft_half=False,
+    ):
+        super(StyleGAN2GeneratorBilinearSFT, self).__init__(
+            out_size,
+            num_style_feat=num_style_feat,
+            num_mlp=num_mlp,
+            channel_multiplier=channel_multiplier,
+            lr_mlp=lr_mlp,
+            narrow=narrow,
+        )
+        self.sft_half = sft_half
+    def forward(
+        self,
+        styles,
+        conditions,
+        input_is_latent=False,
+        noise=None,
+        randomize_noise=True,
+        truncation=1,
+        truncation_latent=None,
+        inject_index=None,
+        return_latents=False,
+    ):
+        """Forward function for StyleGAN2GeneratorBilinearSFT.
+        Args:
+            styles (list[Tensor]): Sample codes of styles.
+            conditions (list[Tensor]): SFT conditions to generators.
+            input_is_latent (bool): Whether input is latent style. Default: False.
+            noise (Tensor | None): Input noise or None. Default: None.
+            randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+            truncation (float): The truncation ratio. Default: 1.
+            truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
+            inject_index (int | None): The injection index for mixing noise. Default: None.
+            return_latents (bool): Whether to return style latents. Default: False.
+        """
+        # style codes -> latents with Style MLP layer
+        if not input_is_latent:
+            styles = [self.style_mlp(s) for s in styles]
+        # noises
+        if noise is None:
+            if randomize_noise:
+                noise = [None] * self.num_layers  # for each style conv layer
+            else:  # use the stored noise
+                noise = [
+                    getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
+                ]
+        # style truncation
+        if truncation < 1:
+            style_truncation = []
+            for style in styles:
+                style_truncation.append(
+                    truncation_latent + truncation * (style - truncation_latent)
+                )
+            styles = style_truncation
+        # get style latents with injection
+        if len(styles) == 1:
+            inject_index = self.num_latent
+            if styles[0].ndim < 3:
+                # repeat latent code for all the layers
+                latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            else:  # used for encoder with different latent code for each layer
+                latent = styles[0]
+        elif len(styles) == 2:  # mixing noises
+            if inject_index is None:
+                inject_index = random.randint(1, self.num_latent - 1)
+            latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            latent2 = (
+                styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
+            )
+            latent = torch.cat([latent1, latent2], 1)
+        # main generation
+        out = self.constant_input(latent.shape[0])
+        out = self.style_conv1(out, latent[:, 0], noise=noise[0])
+        skip = self.to_rgb1(out, latent[:, 1])
+        i = 1
+        for conv1, conv2, noise1, noise2, to_rgb in zip(
+            self.style_convs[::2],
+            self.style_convs[1::2],
+            noise[1::2],
+            noise[2::2],
+            self.to_rgbs,
+        ):
+            out = conv1(out, latent[:, i], noise=noise1)
+            # the conditions may have fewer levels
+            if i < len(conditions):
+                # SFT part to combine the conditions
+                if self.sft_half:  # only apply SFT to half of the channels
+                    out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1)
+                    out_sft = out_sft * conditions[i - 1] + conditions[i]
+                    out = torch.cat([out_same, out_sft], dim=1)
+                else:  # apply SFT to all the channels
+                    out = out * conditions[i - 1] + conditions[i]
+            out = conv2(out, latent[:, i + 1], noise=noise2)
+            skip = to_rgb(out, latent[:, i + 2], skip)  # feature back to the rgb space
+            i += 2
+        image = skip
+        if return_latents:
+            return image, latent
+        else:
+            return image, None
+class GFPGANBilinear(nn.Module):
+    """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT.
+    It is the bilinear version and it does not use the complicated UpFirDnSmooth function that is not friendly for
+    deployment. It can be easily converted to the clean version: GFPGANv1Clean.
+    Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior.
+    Args:
+        out_size (int): The spatial size of outputs.
+        num_style_feat (int): Channel number of style features. Default: 512.
+        channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+        decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None.
+        fix_decoder (bool): Whether to fix the decoder. Default: True.
+        num_mlp (int): Layer number of MLP style layers. Default: 8.
+        lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01.
+        input_is_latent (bool): Whether input is latent style. Default: False.
+        different_w (bool): Whether to use different latent w for different layers. Default: False.
+        narrow (float): The narrow ratio for channels. Default: 1.
+        sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
+    """
+    def __init__(
+        self,
+        out_size,
+        num_style_feat=512,
+        channel_multiplier=1,
+        decoder_load_path=None,
+        fix_decoder=True,
+        # for stylegan decoder
+        num_mlp=8,
+        lr_mlp=0.01,
+        input_is_latent=False,
+        different_w=False,
+        narrow=1,
+        sft_half=False,
+    ):
+        super(GFPGANBilinear, self).__init__()
+        self.input_is_latent = input_is_latent
+        self.different_w = different_w
+        self.num_style_feat = num_style_feat
+        self.min_size_restriction = 512
+        unet_narrow = narrow * 0.5  # by default, use a half of input channels
+        channels = {
+            "4": int(512 * unet_narrow),
+            "8": int(512 * unet_narrow),
+            "16": int(512 * unet_narrow),
+            "32": int(512 * unet_narrow),
+            "64": int(256 * channel_multiplier * unet_narrow),
+            "128": int(128 * channel_multiplier * unet_narrow),
+            "256": int(64 * channel_multiplier * unet_narrow),
+            "512": int(32 * channel_multiplier * unet_narrow),
+            "1024": int(16 * channel_multiplier * unet_narrow),
+        }
+        self.log_size = int(math.log(out_size, 2))
+        first_out_size = 2 ** (int(math.log(out_size, 2)))
+        self.conv_body_first = ConvLayer(
+            3, channels[f"{first_out_size}"], 1, bias=True, activate=True
+        )
+        # downsample
+        in_channels = channels[f"{first_out_size}"]
+        self.conv_body_down = nn.ModuleList()
+        for i in range(self.log_size, 2, -1):
+            out_channels = channels[f"{2**(i - 1)}"]
+            self.conv_body_down.append(ResBlock(in_channels, out_channels))
+            in_channels = out_channels
+        self.final_conv = ConvLayer(
+            in_channels, channels["4"], 3, bias=True, activate=True
+        )
+        # upsample
+        in_channels = channels["4"]
+        self.conv_body_up = nn.ModuleList()
+        for i in range(3, self.log_size + 1):
+            out_channels = channels[f"{2**i}"]
+            self.conv_body_up.append(ResUpBlock(in_channels, out_channels))
+            in_channels = out_channels
+        # to RGB
+        self.toRGB = nn.ModuleList()
+        for i in range(3, self.log_size + 1):
+            self.toRGB.append(
+                EqualConv2d(
+                    channels[f"{2**i}"],
+                    3,
+                    1,
+                    stride=1,
+                    padding=0,
+                    bias=True,
+                    bias_init_val=0,
+                )
+            )
+        if different_w:
+            linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat
+        else:
+            linear_out_channel = num_style_feat
+        self.final_linear = EqualLinear(
+            channels["4"] * 4 * 4,
+            linear_out_channel,
+            bias=True,
+            bias_init_val=0,
+            lr_mul=1,
+            activation=None,
+        )
+        # the decoder: stylegan2 generator with SFT modulations
+        self.stylegan_decoder = StyleGAN2GeneratorBilinearSFT(
+            out_size=out_size,
+            num_style_feat=num_style_feat,
+            num_mlp=num_mlp,
+            channel_multiplier=channel_multiplier,
+            lr_mlp=lr_mlp,
+            narrow=narrow,
+            sft_half=sft_half,
+        )
+        # load pre-trained stylegan2 model if necessary
+        if decoder_load_path:
+            self.stylegan_decoder.load_state_dict(
+                torch.load(
+                    decoder_load_path, map_location=lambda storage, loc: storage
+                )["params_ema"]
+            )
+        # fix decoder without updating params
+        if fix_decoder:
+            for _, param in self.stylegan_decoder.named_parameters():
+                param.requires_grad = False
+        # for SFT modulations (scale and shift)
+        self.condition_scale = nn.ModuleList()
+        self.condition_shift = nn.ModuleList()
+        for i in range(3, self.log_size + 1):
+            out_channels = channels[f"{2**i}"]
+            if sft_half:
+                sft_out_channels = out_channels
+            else:
+                sft_out_channels = out_channels * 2
+            self.condition_scale.append(
+                nn.Sequential(
+                    EqualConv2d(
+                        out_channels,
+                        out_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        bias=True,
+                        bias_init_val=0,
+                    ),
+                    ScaledLeakyReLU(0.2),
+                    EqualConv2d(
+                        out_channels,
+                        sft_out_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        bias=True,
+                        bias_init_val=1,
+                    ),
+                )
+            )
+            self.condition_shift.append(
+                nn.Sequential(
+                    EqualConv2d(
+                        out_channels,
+                        out_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        bias=True,
+                        bias_init_val=0,
+                    ),
+                    ScaledLeakyReLU(0.2),
+                    EqualConv2d(
+                        out_channels,
+                        sft_out_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        bias=True,
+                        bias_init_val=0,
+                    ),
+                )
+            )
+    def forward(self, x, return_latents=False, return_rgb=True, randomize_noise=True):
+        """Forward function for GFPGANBilinear.
+        Args:
+            x (Tensor): Input images.
+            return_latents (bool): Whether to return style latents. Default: False.
+            return_rgb (bool): Whether return intermediate rgb images. Default: True.
+            randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+        """
+        conditions = []
+        unet_skips = []
+        out_rgbs = []
+        # encoder
+        feat = self.conv_body_first(x)
+        for i in range(self.log_size - 2):
+            feat = self.conv_body_down[i](feat)
+            unet_skips.insert(0, feat)
+        feat = self.final_conv(feat)
+        # style code
+        style_code = self.final_linear(feat.view(feat.size(0), -1))
+        if self.different_w:
+            style_code = style_code.view(style_code.size(0), -1, self.num_style_feat)
+        # decode
+        for i in range(self.log_size - 2):
+            # add unet skip
+            feat = feat + unet_skips[i]
+            # ResUpLayer
+            feat = self.conv_body_up[i](feat)
+            # generate scale and shift for SFT layers
+            scale = self.condition_scale[i](feat)
+            conditions.append(scale.clone())
+            shift = self.condition_shift[i](feat)
+            conditions.append(shift.clone())
+            # generate rgb images
+            if return_rgb:
+                out_rgbs.append(self.toRGB[i](feat))
+        # decoder
+        image, _ = self.stylegan_decoder(
+            [style_code],
+            conditions,
+            return_latents=return_latents,
+            input_is_latent=self.input_is_latent,
+            randomize_noise=randomize_noise,
+        )
+        return image, out_rgbs

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/gfpganv1_arch.py ADDED Viewed

	@@ -0,0 +1,566 @@

+# pylint: skip-file
+# type: ignore
+import math
+import random
+import torch
+from torch import nn
+from torch.nn import functional as F
+from .fused_act import FusedLeakyReLU
+from .stylegan2_arch import (
+    ConvLayer,
+    EqualConv2d,
+    EqualLinear,
+    ResBlock,
+    ScaledLeakyReLU,
+    StyleGAN2Generator,
+)
+class StyleGAN2GeneratorSFT(StyleGAN2Generator):
+    """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform).
+    Args:
+        out_size (int): The spatial size of outputs.
+        num_style_feat (int): Channel number of style features. Default: 512.
+        num_mlp (int): Layer number of MLP style layers. Default: 8.
+        channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+        resample_kernel (list[int]): A list indicating the 1D resample kernel magnitude. A cross production will be
+            applied to extent 1D resample kernel to 2D resample kernel. Default: (1, 3, 3, 1).
+        lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01.
+        narrow (float): The narrow ratio for channels. Default: 1.
+        sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
+    """
+    def __init__(
+        self,
+        out_size,
+        num_style_feat=512,
+        num_mlp=8,
+        channel_multiplier=2,
+        resample_kernel=(1, 3, 3, 1),
+        lr_mlp=0.01,
+        narrow=1,
+        sft_half=False,
+    ):
+        super(StyleGAN2GeneratorSFT, self).__init__(
+            out_size,
+            num_style_feat=num_style_feat,
+            num_mlp=num_mlp,
+            channel_multiplier=channel_multiplier,
+            resample_kernel=resample_kernel,
+            lr_mlp=lr_mlp,
+            narrow=narrow,
+        )
+        self.sft_half = sft_half
+    def forward(
+        self,
+        styles,
+        conditions,
+        input_is_latent=False,
+        noise=None,
+        randomize_noise=True,
+        truncation=1,
+        truncation_latent=None,
+        inject_index=None,
+        return_latents=False,
+    ):
+        """Forward function for StyleGAN2GeneratorSFT.
+        Args:
+            styles (list[Tensor]): Sample codes of styles.
+            conditions (list[Tensor]): SFT conditions to generators.
+            input_is_latent (bool): Whether input is latent style. Default: False.
+            noise (Tensor | None): Input noise or None. Default: None.
+            randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+            truncation (float): The truncation ratio. Default: 1.
+            truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
+            inject_index (int | None): The injection index for mixing noise. Default: None.
+            return_latents (bool): Whether to return style latents. Default: False.
+        """
+        # style codes -> latents with Style MLP layer
+        if not input_is_latent:
+            styles = [self.style_mlp(s) for s in styles]
+        # noises
+        if noise is None:
+            if randomize_noise:
+                noise = [None] * self.num_layers  # for each style conv layer
+            else:  # use the stored noise
+                noise = [
+                    getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
+                ]
+        # style truncation
+        if truncation < 1:
+            style_truncation = []
+            for style in styles:
+                style_truncation.append(
+                    truncation_latent + truncation * (style - truncation_latent)
+                )
+            styles = style_truncation
+        # get style latents with injection
+        if len(styles) == 1:
+            inject_index = self.num_latent
+            if styles[0].ndim < 3:
+                # repeat latent code for all the layers
+                latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            else:  # used for encoder with different latent code for each layer
+                latent = styles[0]
+        elif len(styles) == 2:  # mixing noises
+            if inject_index is None:
+                inject_index = random.randint(1, self.num_latent - 1)
+            latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            latent2 = (
+                styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
+            )
+            latent = torch.cat([latent1, latent2], 1)
+        # main generation
+        out = self.constant_input(latent.shape[0])
+        out = self.style_conv1(out, latent[:, 0], noise=noise[0])
+        skip = self.to_rgb1(out, latent[:, 1])
+        i = 1
+        for conv1, conv2, noise1, noise2, to_rgb in zip(
+            self.style_convs[::2],
+            self.style_convs[1::2],
+            noise[1::2],
+            noise[2::2],
+            self.to_rgbs,
+        ):
+            out = conv1(out, latent[:, i], noise=noise1)
+            # the conditions may have fewer levels
+            if i < len(conditions):
+                # SFT part to combine the conditions
+                if self.sft_half:  # only apply SFT to half of the channels
+                    out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1)
+                    out_sft = out_sft * conditions[i - 1] + conditions[i]
+                    out = torch.cat([out_same, out_sft], dim=1)
+                else:  # apply SFT to all the channels
+                    out = out * conditions[i - 1] + conditions[i]
+            out = conv2(out, latent[:, i + 1], noise=noise2)
+            skip = to_rgb(out, latent[:, i + 2], skip)  # feature back to the rgb space
+            i += 2
+        image = skip
+        if return_latents:
+            return image, latent
+        else:
+            return image, None
+class ConvUpLayer(nn.Module):
+    """Convolutional upsampling layer. It uses bilinear upsampler + Conv.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Size of the convolving kernel.
+        stride (int): Stride of the convolution. Default: 1
+        padding (int): Zero-padding added to both sides of the input. Default: 0.
+        bias (bool): If ``True``, adds a learnable bias to the output. Default: ``True``.
+        bias_init_val (float): Bias initialized value. Default: 0.
+        activate (bool): Whether use activateion. Default: True.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        bias=True,
+        bias_init_val=0,
+        activate=True,
+    ):
+        super(ConvUpLayer, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        # self.scale is used to scale the convolution weights, which is related to the common initializations.
+        self.scale = 1 / math.sqrt(in_channels * kernel_size**2)
+        self.weight = nn.Parameter(
+            torch.randn(out_channels, in_channels, kernel_size, kernel_size)
+        )
+        if bias and not activate:
+            self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val))
+        else:
+            self.register_parameter("bias", None)
+        # activation
+        if activate:
+            if bias:
+                self.activation = FusedLeakyReLU(out_channels)
+            else:
+                self.activation = ScaledLeakyReLU(0.2)
+        else:
+            self.activation = None
+    def forward(self, x):
+        # bilinear upsample
+        out = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=False)
+        # conv
+        out = F.conv2d(
+            out,
+            self.weight * self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+        )
+        # activation
+        if self.activation is not None:
+            out = self.activation(out)
+        return out
+class ResUpBlock(nn.Module):
+    """Residual block with upsampling.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+    """
+    def __init__(self, in_channels, out_channels):
+        super(ResUpBlock, self).__init__()
+        self.conv1 = ConvLayer(in_channels, in_channels, 3, bias=True, activate=True)
+        self.conv2 = ConvUpLayer(
+            in_channels, out_channels, 3, stride=1, padding=1, bias=True, activate=True
+        )
+        self.skip = ConvUpLayer(
+            in_channels, out_channels, 1, bias=False, activate=False
+        )
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.conv2(out)
+        skip = self.skip(x)
+        out = (out + skip) / math.sqrt(2)
+        return out
+class GFPGANv1(nn.Module):
+    """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT.
+    Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior.
+    Args:
+        out_size (int): The spatial size of outputs.
+        num_style_feat (int): Channel number of style features. Default: 512.
+        channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+        resample_kernel (list[int]): A list indicating the 1D resample kernel magnitude. A cross production will be
+            applied to extent 1D resample kernel to 2D resample kernel. Default: (1, 3, 3, 1).
+        decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None.
+        fix_decoder (bool): Whether to fix the decoder. Default: True.
+        num_mlp (int): Layer number of MLP style layers. Default: 8.
+        lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01.
+        input_is_latent (bool): Whether input is latent style. Default: False.
+        different_w (bool): Whether to use different latent w for different layers. Default: False.
+        narrow (float): The narrow ratio for channels. Default: 1.
+        sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
+    """
+    def __init__(
+        self,
+        out_size,
+        num_style_feat=512,
+        channel_multiplier=1,
+        resample_kernel=(1, 3, 3, 1),
+        decoder_load_path=None,
+        fix_decoder=True,
+        # for stylegan decoder
+        num_mlp=8,
+        lr_mlp=0.01,
+        input_is_latent=False,
+        different_w=False,
+        narrow=1,
+        sft_half=False,
+    ):
+        super(GFPGANv1, self).__init__()
+        self.input_is_latent = input_is_latent
+        self.different_w = different_w
+        self.num_style_feat = num_style_feat
+        unet_narrow = narrow * 0.5  # by default, use a half of input channels
+        channels = {
+            "4": int(512 * unet_narrow),
+            "8": int(512 * unet_narrow),
+            "16": int(512 * unet_narrow),
+            "32": int(512 * unet_narrow),
+            "64": int(256 * channel_multiplier * unet_narrow),
+            "128": int(128 * channel_multiplier * unet_narrow),
+            "256": int(64 * channel_multiplier * unet_narrow),
+            "512": int(32 * channel_multiplier * unet_narrow),
+            "1024": int(16 * channel_multiplier * unet_narrow),
+        }
+        self.log_size = int(math.log(out_size, 2))
+        first_out_size = 2 ** (int(math.log(out_size, 2)))
+        self.conv_body_first = ConvLayer(
+            3, channels[f"{first_out_size}"], 1, bias=True, activate=True
+        )
+        # downsample
+        in_channels = channels[f"{first_out_size}"]
+        self.conv_body_down = nn.ModuleList()
+        for i in range(self.log_size, 2, -1):
+            out_channels = channels[f"{2**(i - 1)}"]
+            self.conv_body_down.append(
+                ResBlock(in_channels, out_channels, resample_kernel)
+            )
+            in_channels = out_channels
+        self.final_conv = ConvLayer(
+            in_channels, channels["4"], 3, bias=True, activate=True
+        )
+        # upsample
+        in_channels = channels["4"]
+        self.conv_body_up = nn.ModuleList()
+        for i in range(3, self.log_size + 1):
+            out_channels = channels[f"{2**i}"]
+            self.conv_body_up.append(ResUpBlock(in_channels, out_channels))
+            in_channels = out_channels
+        # to RGB
+        self.toRGB = nn.ModuleList()
+        for i in range(3, self.log_size + 1):
+            self.toRGB.append(
+                EqualConv2d(
+                    channels[f"{2**i}"],
+                    3,
+                    1,
+                    stride=1,
+                    padding=0,
+                    bias=True,
+                    bias_init_val=0,
+                )
+            )
+        if different_w:
+            linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat
+        else:
+            linear_out_channel = num_style_feat
+        self.final_linear = EqualLinear(
+            channels["4"] * 4 * 4,
+            linear_out_channel,
+            bias=True,
+            bias_init_val=0,
+            lr_mul=1,
+            activation=None,
+        )
+        # the decoder: stylegan2 generator with SFT modulations
+        self.stylegan_decoder = StyleGAN2GeneratorSFT(
+            out_size=out_size,
+            num_style_feat=num_style_feat,
+            num_mlp=num_mlp,
+            channel_multiplier=channel_multiplier,
+            resample_kernel=resample_kernel,
+            lr_mlp=lr_mlp,
+            narrow=narrow,
+            sft_half=sft_half,
+        )
+        # load pre-trained stylegan2 model if necessary
+        if decoder_load_path:
+            self.stylegan_decoder.load_state_dict(
+                torch.load(
+                    decoder_load_path, map_location=lambda storage, loc: storage
+                )["params_ema"]
+            )
+        # fix decoder without updating params
+        if fix_decoder:
+            for _, param in self.stylegan_decoder.named_parameters():
+                param.requires_grad = False
+        # for SFT modulations (scale and shift)
+        self.condition_scale = nn.ModuleList()
+        self.condition_shift = nn.ModuleList()
+        for i in range(3, self.log_size + 1):
+            out_channels = channels[f"{2**i}"]
+            if sft_half:
+                sft_out_channels = out_channels
+            else:
+                sft_out_channels = out_channels * 2
+            self.condition_scale.append(
+                nn.Sequential(
+                    EqualConv2d(
+                        out_channels,
+                        out_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        bias=True,
+                        bias_init_val=0,
+                    ),
+                    ScaledLeakyReLU(0.2),
+                    EqualConv2d(
+                        out_channels,
+                        sft_out_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        bias=True,
+                        bias_init_val=1,
+                    ),
+                )
+            )
+            self.condition_shift.append(
+                nn.Sequential(
+                    EqualConv2d(
+                        out_channels,
+                        out_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        bias=True,
+                        bias_init_val=0,
+                    ),
+                    ScaledLeakyReLU(0.2),
+                    EqualConv2d(
+                        out_channels,
+                        sft_out_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        bias=True,
+                        bias_init_val=0,
+                    ),
+                )
+            )
+    def forward(
+        self, x, return_latents=False, return_rgb=True, randomize_noise=True, **kwargs
+    ):
+        """Forward function for GFPGANv1.
+        Args:
+            x (Tensor): Input images.
+            return_latents (bool): Whether to return style latents. Default: False.
+            return_rgb (bool): Whether return intermediate rgb images. Default: True.
+            randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+        """
+        conditions = []
+        unet_skips = []
+        out_rgbs = []
+        # encoder
+        feat = self.conv_body_first(x)
+        for i in range(self.log_size - 2):
+            feat = self.conv_body_down[i](feat)
+            unet_skips.insert(0, feat)
+        feat = self.final_conv(feat)
+        # style code
+        style_code = self.final_linear(feat.view(feat.size(0), -1))
+        if self.different_w:
+            style_code = style_code.view(style_code.size(0), -1, self.num_style_feat)
+        # decode
+        for i in range(self.log_size - 2):
+            # add unet skip
+            feat = feat + unet_skips[i]
+            # ResUpLayer
+            feat = self.conv_body_up[i](feat)
+            # generate scale and shift for SFT layers
+            scale = self.condition_scale[i](feat)
+            conditions.append(scale.clone())
+            shift = self.condition_shift[i](feat)
+            conditions.append(shift.clone())
+            # generate rgb images
+            if return_rgb:
+                out_rgbs.append(self.toRGB[i](feat))
+        # decoder
+        image, _ = self.stylegan_decoder(
+            [style_code],
+            conditions,
+            return_latents=return_latents,
+            input_is_latent=self.input_is_latent,
+            randomize_noise=randomize_noise,
+        )
+        return image, out_rgbs
+class FacialComponentDiscriminator(nn.Module):
+    """Facial component (eyes, mouth, noise) discriminator used in GFPGAN."""
+    def __init__(self):
+        super(FacialComponentDiscriminator, self).__init__()
+        # It now uses a VGG-style architectrue with fixed model size
+        self.conv1 = ConvLayer(
+            3,
+            64,
+            3,
+            downsample=False,
+            resample_kernel=(1, 3, 3, 1),
+            bias=True,
+            activate=True,
+        )
+        self.conv2 = ConvLayer(
+            64,
+            128,
+            3,
+            downsample=True,
+            resample_kernel=(1, 3, 3, 1),
+            bias=True,
+            activate=True,
+        )
+        self.conv3 = ConvLayer(
+            128,
+            128,
+            3,
+            downsample=False,
+            resample_kernel=(1, 3, 3, 1),
+            bias=True,
+            activate=True,
+        )
+        self.conv4 = ConvLayer(
+            128,
+            256,
+            3,
+            downsample=True,
+            resample_kernel=(1, 3, 3, 1),
+            bias=True,
+            activate=True,
+        )
+        self.conv5 = ConvLayer(
+            256,
+            256,
+            3,
+            downsample=False,
+            resample_kernel=(1, 3, 3, 1),
+            bias=True,
+            activate=True,
+        )
+        self.final_conv = ConvLayer(256, 1, 3, bias=True, activate=False)
+    def forward(self, x, return_feats=False, **kwargs):
+        """Forward function for FacialComponentDiscriminator.
+        Args:
+            x (Tensor): Input images.
+            return_feats (bool): Whether to return intermediate features. Default: False.
+        """
+        feat = self.conv1(x)
+        feat = self.conv3(self.conv2(feat))
+        rlt_feats = []
+        if return_feats:
+            rlt_feats.append(feat.clone())
+        feat = self.conv5(self.conv4(feat))
+        if return_feats:
+            rlt_feats.append(feat.clone())
+        out = self.final_conv(feat)
+        if return_feats:
+            return out, rlt_feats
+        else:
+            return out, None

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/gfpganv1_clean_arch.py ADDED Viewed

	@@ -0,0 +1,370 @@

+# pylint: skip-file
+# type: ignore
+import math
+import random
+import torch
+from torch import nn
+from torch.nn import functional as F
+from .stylegan2_clean_arch import StyleGAN2GeneratorClean
+class StyleGAN2GeneratorCSFT(StyleGAN2GeneratorClean):
+    """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform).
+    It is the clean version without custom compiled CUDA extensions used in StyleGAN2.
+    Args:
+        out_size (int): The spatial size of outputs.
+        num_style_feat (int): Channel number of style features. Default: 512.
+        num_mlp (int): Layer number of MLP style layers. Default: 8.
+        channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+        narrow (float): The narrow ratio for channels. Default: 1.
+        sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
+    """
+    def __init__(
+        self,
+        out_size,
+        num_style_feat=512,
+        num_mlp=8,
+        channel_multiplier=2,
+        narrow=1,
+        sft_half=False,
+    ):
+        super(StyleGAN2GeneratorCSFT, self).__init__(
+            out_size,
+            num_style_feat=num_style_feat,
+            num_mlp=num_mlp,
+            channel_multiplier=channel_multiplier,
+            narrow=narrow,
+        )
+        self.sft_half = sft_half
+    def forward(
+        self,
+        styles,
+        conditions,
+        input_is_latent=False,
+        noise=None,
+        randomize_noise=True,
+        truncation=1,
+        truncation_latent=None,
+        inject_index=None,
+        return_latents=False,
+    ):
+        """Forward function for StyleGAN2GeneratorCSFT.
+        Args:
+            styles (list[Tensor]): Sample codes of styles.
+            conditions (list[Tensor]): SFT conditions to generators.
+            input_is_latent (bool): Whether input is latent style. Default: False.
+            noise (Tensor | None): Input noise or None. Default: None.
+            randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+            truncation (float): The truncation ratio. Default: 1.
+            truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
+            inject_index (int | None): The injection index for mixing noise. Default: None.
+            return_latents (bool): Whether to return style latents. Default: False.
+        """
+        # style codes -> latents with Style MLP layer
+        if not input_is_latent:
+            styles = [self.style_mlp(s) for s in styles]
+        # noises
+        if noise is None:
+            if randomize_noise:
+                noise = [None] * self.num_layers  # for each style conv layer
+            else:  # use the stored noise
+                noise = [
+                    getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
+                ]
+        # style truncation
+        if truncation < 1:
+            style_truncation = []
+            for style in styles:
+                style_truncation.append(
+                    truncation_latent + truncation * (style - truncation_latent)
+                )
+            styles = style_truncation
+        # get style latents with injection
+        if len(styles) == 1:
+            inject_index = self.num_latent
+            if styles[0].ndim < 3:
+                # repeat latent code for all the layers
+                latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            else:  # used for encoder with different latent code for each layer
+                latent = styles[0]
+        elif len(styles) == 2:  # mixing noises
+            if inject_index is None:
+                inject_index = random.randint(1, self.num_latent - 1)
+            latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            latent2 = (
+                styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
+            )
+            latent = torch.cat([latent1, latent2], 1)
+        # main generation
+        out = self.constant_input(latent.shape[0])
+        out = self.style_conv1(out, latent[:, 0], noise=noise[0])
+        skip = self.to_rgb1(out, latent[:, 1])
+        i = 1
+        for conv1, conv2, noise1, noise2, to_rgb in zip(
+            self.style_convs[::2],
+            self.style_convs[1::2],
+            noise[1::2],
+            noise[2::2],
+            self.to_rgbs,
+        ):
+            out = conv1(out, latent[:, i], noise=noise1)
+            # the conditions may have fewer levels
+            if i < len(conditions):
+                # SFT part to combine the conditions
+                if self.sft_half:  # only apply SFT to half of the channels
+                    out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1)
+                    out_sft = out_sft * conditions[i - 1] + conditions[i]
+                    out = torch.cat([out_same, out_sft], dim=1)
+                else:  # apply SFT to all the channels
+                    out = out * conditions[i - 1] + conditions[i]
+            out = conv2(out, latent[:, i + 1], noise=noise2)
+            skip = to_rgb(out, latent[:, i + 2], skip)  # feature back to the rgb space
+            i += 2
+        image = skip
+        if return_latents:
+            return image, latent
+        else:
+            return image, None
+class ResBlock(nn.Module):
+    """Residual block with bilinear upsampling/downsampling.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        mode (str): Upsampling/downsampling mode. Options: down | up. Default: down.
+    """
+    def __init__(self, in_channels, out_channels, mode="down"):
+        super(ResBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels, in_channels, 3, 1, 1)
+        self.conv2 = nn.Conv2d(in_channels, out_channels, 3, 1, 1)
+        self.skip = nn.Conv2d(in_channels, out_channels, 1, bias=False)
+        if mode == "down":
+            self.scale_factor = 0.5
+        elif mode == "up":
+            self.scale_factor = 2
+    def forward(self, x):
+        out = F.leaky_relu_(self.conv1(x), negative_slope=0.2)
+        # upsample/downsample
+        out = F.interpolate(
+            out, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+        )
+        out = F.leaky_relu_(self.conv2(out), negative_slope=0.2)
+        # skip
+        x = F.interpolate(
+            x, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+        )
+        skip = self.skip(x)
+        out = out + skip
+        return out
+class GFPGANv1Clean(nn.Module):
+    """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT.
+    It is the clean version without custom compiled CUDA extensions used in StyleGAN2.
+    Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior.
+    Args:
+        out_size (int): The spatial size of outputs.
+        num_style_feat (int): Channel number of style features. Default: 512.
+        channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+        decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None.
+        fix_decoder (bool): Whether to fix the decoder. Default: True.
+        num_mlp (int): Layer number of MLP style layers. Default: 8.
+        input_is_latent (bool): Whether input is latent style. Default: False.
+        different_w (bool): Whether to use different latent w for different layers. Default: False.
+        narrow (float): The narrow ratio for channels. Default: 1.
+        sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
+    """
+    def __init__(
+        self,
+        state_dict,
+    ):
+        super(GFPGANv1Clean, self).__init__()
+        out_size = 512
+        num_style_feat = 512
+        channel_multiplier = 2
+        decoder_load_path = None
+        fix_decoder = False
+        num_mlp = 8
+        input_is_latent = True
+        different_w = True
+        narrow = 1
+        sft_half = True
+        self.model_arch = "GFPGAN"
+        self.sub_type = "Face SR"
+        self.scale = 8
+        self.in_nc = 3
+        self.out_nc = 3
+        self.state = state_dict
+        self.supports_fp16 = False
+        self.supports_bf16 = True
+        self.min_size_restriction = 512
+        self.input_is_latent = input_is_latent
+        self.different_w = different_w
+        self.num_style_feat = num_style_feat
+        unet_narrow = narrow * 0.5  # by default, use a half of input channels
+        channels = {
+            "4": int(512 * unet_narrow),
+            "8": int(512 * unet_narrow),
+            "16": int(512 * unet_narrow),
+            "32": int(512 * unet_narrow),
+            "64": int(256 * channel_multiplier * unet_narrow),
+            "128": int(128 * channel_multiplier * unet_narrow),
+            "256": int(64 * channel_multiplier * unet_narrow),
+            "512": int(32 * channel_multiplier * unet_narrow),
+            "1024": int(16 * channel_multiplier * unet_narrow),
+        }
+        self.log_size = int(math.log(out_size, 2))
+        first_out_size = 2 ** (int(math.log(out_size, 2)))
+        self.conv_body_first = nn.Conv2d(3, channels[f"{first_out_size}"], 1)
+        # downsample
+        in_channels = channels[f"{first_out_size}"]
+        self.conv_body_down = nn.ModuleList()
+        for i in range(self.log_size, 2, -1):
+            out_channels = channels[f"{2**(i - 1)}"]
+            self.conv_body_down.append(ResBlock(in_channels, out_channels, mode="down"))
+            in_channels = out_channels
+        self.final_conv = nn.Conv2d(in_channels, channels["4"], 3, 1, 1)
+        # upsample
+        in_channels = channels["4"]
+        self.conv_body_up = nn.ModuleList()
+        for i in range(3, self.log_size + 1):
+            out_channels = channels[f"{2**i}"]
+            self.conv_body_up.append(ResBlock(in_channels, out_channels, mode="up"))
+            in_channels = out_channels
+        # to RGB
+        self.toRGB = nn.ModuleList()
+        for i in range(3, self.log_size + 1):
+            self.toRGB.append(nn.Conv2d(channels[f"{2**i}"], 3, 1))
+        if different_w:
+            linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat
+        else:
+            linear_out_channel = num_style_feat
+        self.final_linear = nn.Linear(channels["4"] * 4 * 4, linear_out_channel)
+        # the decoder: stylegan2 generator with SFT modulations
+        self.stylegan_decoder = StyleGAN2GeneratorCSFT(
+            out_size=out_size,
+            num_style_feat=num_style_feat,
+            num_mlp=num_mlp,
+            channel_multiplier=channel_multiplier,
+            narrow=narrow,
+            sft_half=sft_half,
+        )
+        # load pre-trained stylegan2 model if necessary
+        if decoder_load_path:
+            self.stylegan_decoder.load_state_dict(
+                torch.load(
+                    decoder_load_path, map_location=lambda storage, loc: storage
+                )["params_ema"]
+            )
+        # fix decoder without updating params
+        if fix_decoder:
+            for _, param in self.stylegan_decoder.named_parameters():
+                param.requires_grad = False
+        # for SFT modulations (scale and shift)
+        self.condition_scale = nn.ModuleList()
+        self.condition_shift = nn.ModuleList()
+        for i in range(3, self.log_size + 1):
+            out_channels = channels[f"{2**i}"]
+            if sft_half:
+                sft_out_channels = out_channels
+            else:
+                sft_out_channels = out_channels * 2
+            self.condition_scale.append(
+                nn.Sequential(
+                    nn.Conv2d(out_channels, out_channels, 3, 1, 1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1),
+                )
+            )
+            self.condition_shift.append(
+                nn.Sequential(
+                    nn.Conv2d(out_channels, out_channels, 3, 1, 1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1),
+                )
+            )
+        self.load_state_dict(state_dict)
+    def forward(
+        self, x, return_latents=False, return_rgb=True, randomize_noise=True, **kwargs
+    ):
+        """Forward function for GFPGANv1Clean.
+        Args:
+            x (Tensor): Input images.
+            return_latents (bool): Whether to return style latents. Default: False.
+            return_rgb (bool): Whether return intermediate rgb images. Default: True.
+            randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+        """
+        conditions = []
+        unet_skips = []
+        out_rgbs = []
+        # encoder
+        feat = F.leaky_relu_(self.conv_body_first(x), negative_slope=0.2)
+        for i in range(self.log_size - 2):
+            feat = self.conv_body_down[i](feat)
+            unet_skips.insert(0, feat)
+        feat = F.leaky_relu_(self.final_conv(feat), negative_slope=0.2)
+        # style code
+        style_code = self.final_linear(feat.view(feat.size(0), -1))
+        if self.different_w:
+            style_code = style_code.view(style_code.size(0), -1, self.num_style_feat)
+        # decode
+        for i in range(self.log_size - 2):
+            # add unet skip
+            feat = feat + unet_skips[i]
+            # ResUpLayer
+            feat = self.conv_body_up[i](feat)
+            # generate scale and shift for SFT layers
+            scale = self.condition_scale[i](feat)
+            conditions.append(scale.clone())
+            shift = self.condition_shift[i](feat)
+            conditions.append(shift.clone())
+            # generate rgb images
+            if return_rgb:
+                out_rgbs.append(self.toRGB[i](feat))
+        # decoder
+        image, _ = self.stylegan_decoder(
+            [style_code],
+            conditions,
+            return_latents=return_latents,
+            input_is_latent=self.input_is_latent,
+            randomize_noise=randomize_noise,
+        )
+        return image, out_rgbs

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/restoreformer_arch.py ADDED Viewed

	@@ -0,0 +1,776 @@

+# pylint: skip-file
+# type: ignore
+"""Modified from https://github.com/wzhouxiff/RestoreFormer
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class VectorQuantizer(nn.Module):
+    """
+    see https://github.com/MishaLaskin/vqvae/blob/d761a999e2267766400dc646d82d3ac3657771d4/models/quantizer.py
+    ____________________________________________
+    Discretization bottleneck part of the VQ-VAE.
+    Inputs:
+    - n_e : number of embeddings
+    - e_dim : dimension of embedding
+    - beta : commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2
+    _____________________________________________
+    """
+    def __init__(self, n_e, e_dim, beta):
+        super(VectorQuantizer, self).__init__()
+        self.n_e = n_e
+        self.e_dim = e_dim
+        self.beta = beta
+        self.embedding = nn.Embedding(self.n_e, self.e_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
+    def forward(self, z):
+        """
+        Inputs the output of the encoder network z and maps it to a discrete
+        one-hot vector that is the index of the closest embedding vector e_j
+        z (continuous) -> z_q (discrete)
+        z.shape = (batch, channel, height, width)
+        quantization pipeline:
+            1. get encoder input (B,C,H,W)
+            2. flatten input to (B*H*W,C)
+        """
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = z.permute(0, 2, 3, 1).contiguous()
+        z_flattened = z.view(-1, self.e_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+        d = (
+            torch.sum(z_flattened**2, dim=1, keepdim=True)
+            + torch.sum(self.embedding.weight**2, dim=1)
+            - 2 * torch.matmul(z_flattened, self.embedding.weight.t())
+        )
+        # could possible replace this here
+        # #\start...
+        # find closest encodings
+        min_value, min_encoding_indices = torch.min(d, dim=1)
+        min_encoding_indices = min_encoding_indices.unsqueeze(1)
+        min_encodings = torch.zeros(min_encoding_indices.shape[0], self.n_e).to(z)
+        min_encodings.scatter_(1, min_encoding_indices, 1)
+        # dtype min encodings: torch.float32
+        # min_encodings shape: torch.Size([2048, 512])
+        # min_encoding_indices.shape: torch.Size([2048, 1])
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape)
+        # .........\end
+        # with:
+        # .........\start
+        # min_encoding_indices = torch.argmin(d, dim=1)
+        # z_q = self.embedding(min_encoding_indices)
+        # ......\end......... (TODO)
+        # compute loss for embedding
+        loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * torch.mean(
+            (z_q - z.detach()) ** 2
+        )
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+        # perplexity
+        e_mean = torch.mean(min_encodings, dim=0)
+        perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10)))
+        # reshape back to match original input shape
+        z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        return z_q, loss, (perplexity, min_encodings, min_encoding_indices, d)
+    def get_codebook_entry(self, indices, shape):
+        # shape specifying (batch, height, width, channel)
+        # TODO: check for more easy handling with nn.Embedding
+        min_encodings = torch.zeros(indices.shape[0], self.n_e).to(indices)
+        min_encodings.scatter_(1, indices[:, None], 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings.float(), self.embedding.weight)
+        if shape is not None:
+            z_q = z_q.view(shape)
+            # reshape back to match original input shape
+            z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        return z_q
+# pytorch_diffusion + derived encoder decoder
+def nonlinearity(x):
+    # swish
+    return x * torch.sigmoid(x)
+def Normalize(in_channels):
+    return torch.nn.GroupNorm(
+        num_groups=32, num_channels=in_channels, eps=1e-6, affine=True
+    )
+class Upsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            self.conv = torch.nn.Conv2d(
+                in_channels, in_channels, kernel_size=3, stride=1, padding=1
+            )
+    def forward(self, x):
+        x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
+        if self.with_conv:
+            x = self.conv(x)
+        return x
+class Downsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            # no asymmetric padding in torch conv, must do it ourselves
+            self.conv = torch.nn.Conv2d(
+                in_channels, in_channels, kernel_size=3, stride=2, padding=0
+            )
+    def forward(self, x):
+        if self.with_conv:
+            pad = (0, 1, 0, 1)
+            x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
+            x = self.conv(x)
+        else:
+            x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2)
+        return x
+class ResnetBlock(nn.Module):
+    def __init__(
+        self,
+        *,
+        in_channels,
+        out_channels=None,
+        conv_shortcut=False,
+        dropout,
+        temb_channels=512
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        out_channels = in_channels if out_channels is None else out_channels
+        self.out_channels = out_channels
+        self.use_conv_shortcut = conv_shortcut
+        self.norm1 = Normalize(in_channels)
+        self.conv1 = torch.nn.Conv2d(
+            in_channels, out_channels, kernel_size=3, stride=1, padding=1
+        )
+        if temb_channels > 0:
+            self.temb_proj = torch.nn.Linear(temb_channels, out_channels)
+        self.norm2 = Normalize(out_channels)
+        self.dropout = torch.nn.Dropout(dropout)
+        self.conv2 = torch.nn.Conv2d(
+            out_channels, out_channels, kernel_size=3, stride=1, padding=1
+        )
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                self.conv_shortcut = torch.nn.Conv2d(
+                    in_channels, out_channels, kernel_size=3, stride=1, padding=1
+                )
+            else:
+                self.nin_shortcut = torch.nn.Conv2d(
+                    in_channels, out_channels, kernel_size=1, stride=1, padding=0
+                )
+    def forward(self, x, temb):
+        h = x
+        h = self.norm1(h)
+        h = nonlinearity(h)
+        h = self.conv1(h)
+        if temb is not None:
+            h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None]
+        h = self.norm2(h)
+        h = nonlinearity(h)
+        h = self.dropout(h)
+        h = self.conv2(h)
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                x = self.conv_shortcut(x)
+            else:
+                x = self.nin_shortcut(x)
+        return x + h
+class MultiHeadAttnBlock(nn.Module):
+    def __init__(self, in_channels, head_size=1):
+        super().__init__()
+        self.in_channels = in_channels
+        self.head_size = head_size
+        self.att_size = in_channels // head_size
+        assert (
+            in_channels % head_size == 0
+        ), "The size of head should be divided by the number of channels."
+        self.norm1 = Normalize(in_channels)
+        self.norm2 = Normalize(in_channels)
+        self.q = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.k = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.v = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.proj_out = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.num = 0
+    def forward(self, x, y=None):
+        h_ = x
+        h_ = self.norm1(h_)
+        if y is None:
+            y = h_
+        else:
+            y = self.norm2(y)
+        q = self.q(y)
+        k = self.k(h_)
+        v = self.v(h_)
+        # compute attention
+        b, c, h, w = q.shape
+        q = q.reshape(b, self.head_size, self.att_size, h * w)
+        q = q.permute(0, 3, 1, 2)  # b, hw, head, att
+        k = k.reshape(b, self.head_size, self.att_size, h * w)
+        k = k.permute(0, 3, 1, 2)
+        v = v.reshape(b, self.head_size, self.att_size, h * w)
+        v = v.permute(0, 3, 1, 2)
+        q = q.transpose(1, 2)
+        v = v.transpose(1, 2)
+        k = k.transpose(1, 2).transpose(2, 3)
+        scale = int(self.att_size) ** (-0.5)
+        q.mul_(scale)
+        w_ = torch.matmul(q, k)
+        w_ = F.softmax(w_, dim=3)
+        w_ = w_.matmul(v)
+        w_ = w_.transpose(1, 2).contiguous()  # [b, h*w, head, att]
+        w_ = w_.view(b, h, w, -1)
+        w_ = w_.permute(0, 3, 1, 2)
+        w_ = self.proj_out(w_)
+        return x + w_
+class MultiHeadEncoder(nn.Module):
+    def __init__(
+        self,
+        ch,
+        out_ch,
+        ch_mult=(1, 2, 4, 8),
+        num_res_blocks=2,
+        attn_resolutions=(16,),
+        dropout=0.0,
+        resamp_with_conv=True,
+        in_channels=3,
+        resolution=512,
+        z_channels=256,
+        double_z=True,
+        enable_mid=True,
+        head_size=1,
+        **ignore_kwargs
+    ):
+        super().__init__()
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        self.enable_mid = enable_mid
+        # downsampling
+        self.conv_in = torch.nn.Conv2d(
+            in_channels, self.ch, kernel_size=3, stride=1, padding=1
+        )
+        curr_res = resolution
+        in_ch_mult = (1,) + tuple(ch_mult)
+        self.down = nn.ModuleList()
+        for i_level in range(self.num_resolutions):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_in = ch * in_ch_mult[i_level]
+            block_out = ch * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks):
+                block.append(
+                    ResnetBlock(
+                        in_channels=block_in,
+                        out_channels=block_out,
+                        temb_channels=self.temb_ch,
+                        dropout=dropout,
+                    )
+                )
+                block_in = block_out
+                if curr_res in attn_resolutions:
+                    attn.append(MultiHeadAttnBlock(block_in, head_size))
+            down = nn.Module()
+            down.block = block
+            down.attn = attn
+            if i_level != self.num_resolutions - 1:
+                down.downsample = Downsample(block_in, resamp_with_conv)
+                curr_res = curr_res // 2
+            self.down.append(down)
+        # middle
+        if self.enable_mid:
+            self.mid = nn.Module()
+            self.mid.block_1 = ResnetBlock(
+                in_channels=block_in,
+                out_channels=block_in,
+                temb_channels=self.temb_ch,
+                dropout=dropout,
+            )
+            self.mid.attn_1 = MultiHeadAttnBlock(block_in, head_size)
+            self.mid.block_2 = ResnetBlock(
+                in_channels=block_in,
+                out_channels=block_in,
+                temb_channels=self.temb_ch,
+                dropout=dropout,
+            )
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv2d(
+            block_in,
+            2 * z_channels if double_z else z_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+        )
+    def forward(self, x):
+        hs = {}
+        # timestep embedding
+        temb = None
+        # downsampling
+        h = self.conv_in(x)
+        hs["in"] = h
+        for i_level in range(self.num_resolutions):
+            for i_block in range(self.num_res_blocks):
+                h = self.down[i_level].block[i_block](h, temb)
+                if len(self.down[i_level].attn) > 0:
+                    h = self.down[i_level].attn[i_block](h)
+            if i_level != self.num_resolutions - 1:
+                # hs.append(h)
+                hs["block_" + str(i_level)] = h
+                h = self.down[i_level].downsample(h)
+        # middle
+        # h = hs[-1]
+        if self.enable_mid:
+            h = self.mid.block_1(h, temb)
+            hs["block_" + str(i_level) + "_atten"] = h
+            h = self.mid.attn_1(h)
+            h = self.mid.block_2(h, temb)
+            hs["mid_atten"] = h
+        # end
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
+        # hs.append(h)
+        hs["out"] = h
+        return hs
+class MultiHeadDecoder(nn.Module):
+    def __init__(
+        self,
+        ch,
+        out_ch,
+        ch_mult=(1, 2, 4, 8),
+        num_res_blocks=2,
+        attn_resolutions=(16,),
+        dropout=0.0,
+        resamp_with_conv=True,
+        in_channels=3,
+        resolution=512,
+        z_channels=256,
+        give_pre_end=False,
+        enable_mid=True,
+        head_size=1,
+        **ignorekwargs
+    ):
+        super().__init__()
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        self.give_pre_end = give_pre_end
+        self.enable_mid = enable_mid
+        # compute in_ch_mult, block_in and curr_res at lowest res
+        block_in = ch * ch_mult[self.num_resolutions - 1]
+        curr_res = resolution // 2 ** (self.num_resolutions - 1)
+        self.z_shape = (1, z_channels, curr_res, curr_res)
+        print(
+            "Working with z of shape {} = {} dimensions.".format(
+                self.z_shape, np.prod(self.z_shape)
+            )
+        )
+        # z to block_in
+        self.conv_in = torch.nn.Conv2d(
+            z_channels, block_in, kernel_size=3, stride=1, padding=1
+        )
+        # middle
+        if self.enable_mid:
+            self.mid = nn.Module()
+            self.mid.block_1 = ResnetBlock(
+                in_channels=block_in,
+                out_channels=block_in,
+                temb_channels=self.temb_ch,
+                dropout=dropout,
+            )
+            self.mid.attn_1 = MultiHeadAttnBlock(block_in, head_size)
+            self.mid.block_2 = ResnetBlock(
+                in_channels=block_in,
+                out_channels=block_in,
+                temb_channels=self.temb_ch,
+                dropout=dropout,
+            )
+        # upsampling
+        self.up = nn.ModuleList()
+        for i_level in reversed(range(self.num_resolutions)):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_out = ch * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks + 1):
+                block.append(
+                    ResnetBlock(
+                        in_channels=block_in,
+                        out_channels=block_out,
+                        temb_channels=self.temb_ch,
+                        dropout=dropout,
+                    )
+                )
+                block_in = block_out
+                if curr_res in attn_resolutions:
+                    attn.append(MultiHeadAttnBlock(block_in, head_size))
+            up = nn.Module()
+            up.block = block
+            up.attn = attn
+            if i_level != 0:
+                up.upsample = Upsample(block_in, resamp_with_conv)
+                curr_res = curr_res * 2
+            self.up.insert(0, up)  # prepend to get consistent order
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv2d(
+            block_in, out_ch, kernel_size=3, stride=1, padding=1
+        )
+    def forward(self, z):
+        # assert z.shape[1:] == self.z_shape[1:]
+        self.last_z_shape = z.shape
+        # timestep embedding
+        temb = None
+        # z to block_in
+        h = self.conv_in(z)
+        # middle
+        if self.enable_mid:
+            h = self.mid.block_1(h, temb)
+            h = self.mid.attn_1(h)
+            h = self.mid.block_2(h, temb)
+        # upsampling
+        for i_level in reversed(range(self.num_resolutions)):
+            for i_block in range(self.num_res_blocks + 1):
+                h = self.up[i_level].block[i_block](h, temb)
+                if len(self.up[i_level].attn) > 0:
+                    h = self.up[i_level].attn[i_block](h)
+            if i_level != 0:
+                h = self.up[i_level].upsample(h)
+        # end
+        if self.give_pre_end:
+            return h
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
+        return h
+class MultiHeadDecoderTransformer(nn.Module):
+    def __init__(
+        self,
+        ch,
+        out_ch,
+        ch_mult=(1, 2, 4, 8),
+        num_res_blocks=2,
+        attn_resolutions=(16,),
+        dropout=0.0,
+        resamp_with_conv=True,
+        in_channels=3,
+        resolution=512,
+        z_channels=256,
+        give_pre_end=False,
+        enable_mid=True,
+        head_size=1,
+        **ignorekwargs
+    ):
+        super().__init__()
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        self.give_pre_end = give_pre_end
+        self.enable_mid = enable_mid
+        # compute in_ch_mult, block_in and curr_res at lowest res
+        block_in = ch * ch_mult[self.num_resolutions - 1]
+        curr_res = resolution // 2 ** (self.num_resolutions - 1)
+        self.z_shape = (1, z_channels, curr_res, curr_res)
+        print(
+            "Working with z of shape {} = {} dimensions.".format(
+                self.z_shape, np.prod(self.z_shape)
+            )
+        )
+        # z to block_in
+        self.conv_in = torch.nn.Conv2d(
+            z_channels, block_in, kernel_size=3, stride=1, padding=1
+        )
+        # middle
+        if self.enable_mid:
+            self.mid = nn.Module()
+            self.mid.block_1 = ResnetBlock(
+                in_channels=block_in,
+                out_channels=block_in,
+                temb_channels=self.temb_ch,
+                dropout=dropout,
+            )
+            self.mid.attn_1 = MultiHeadAttnBlock(block_in, head_size)
+            self.mid.block_2 = ResnetBlock(
+                in_channels=block_in,
+                out_channels=block_in,
+                temb_channels=self.temb_ch,
+                dropout=dropout,
+            )
+        # upsampling
+        self.up = nn.ModuleList()
+        for i_level in reversed(range(self.num_resolutions)):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_out = ch * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks + 1):
+                block.append(
+                    ResnetBlock(
+                        in_channels=block_in,
+                        out_channels=block_out,
+                        temb_channels=self.temb_ch,
+                        dropout=dropout,
+                    )
+                )
+                block_in = block_out
+                if curr_res in attn_resolutions:
+                    attn.append(MultiHeadAttnBlock(block_in, head_size))
+            up = nn.Module()
+            up.block = block
+            up.attn = attn
+            if i_level != 0:
+                up.upsample = Upsample(block_in, resamp_with_conv)
+                curr_res = curr_res * 2
+            self.up.insert(0, up)  # prepend to get consistent order
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv2d(
+            block_in, out_ch, kernel_size=3, stride=1, padding=1
+        )
+    def forward(self, z, hs):
+        # assert z.shape[1:] == self.z_shape[1:]
+        # self.last_z_shape = z.shape
+        # timestep embedding
+        temb = None
+        # z to block_in
+        h = self.conv_in(z)
+        # middle
+        if self.enable_mid:
+            h = self.mid.block_1(h, temb)
+            h = self.mid.attn_1(h, hs["mid_atten"])
+            h = self.mid.block_2(h, temb)
+        # upsampling
+        for i_level in reversed(range(self.num_resolutions)):
+            for i_block in range(self.num_res_blocks + 1):
+                h = self.up[i_level].block[i_block](h, temb)
+                if len(self.up[i_level].attn) > 0:
+                    h = self.up[i_level].attn[i_block](
+                        h, hs["block_" + str(i_level) + "_atten"]
+                    )
+                    # hfeature = h.clone()
+            if i_level != 0:
+                h = self.up[i_level].upsample(h)
+        # end
+        if self.give_pre_end:
+            return h
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
+        return h
+class RestoreFormer(nn.Module):
+    def __init__(
+        self,
+        state_dict,
+    ):
+        super(RestoreFormer, self).__init__()
+        n_embed = 1024
+        embed_dim = 256
+        ch = 64
+        out_ch = 3
+        ch_mult = (1, 2, 2, 4, 4, 8)
+        num_res_blocks = 2
+        attn_resolutions = (16,)
+        dropout = 0.0
+        in_channels = 3
+        resolution = 512
+        z_channels = 256
+        double_z = False
+        enable_mid = True
+        fix_decoder = False
+        fix_codebook = True
+        fix_encoder = False
+        head_size = 8
+        self.model_arch = "RestoreFormer"
+        self.sub_type = "Face SR"
+        self.scale = 8
+        self.in_nc = 3
+        self.out_nc = out_ch
+        self.state = state_dict
+        self.supports_fp16 = False
+        self.supports_bf16 = True
+        self.min_size_restriction = 16
+        self.encoder = MultiHeadEncoder(
+            ch=ch,
+            out_ch=out_ch,
+            ch_mult=ch_mult,
+            num_res_blocks=num_res_blocks,
+            attn_resolutions=attn_resolutions,
+            dropout=dropout,
+            in_channels=in_channels,
+            resolution=resolution,
+            z_channels=z_channels,
+            double_z=double_z,
+            enable_mid=enable_mid,
+            head_size=head_size,
+        )
+        self.decoder = MultiHeadDecoderTransformer(
+            ch=ch,
+            out_ch=out_ch,
+            ch_mult=ch_mult,
+            num_res_blocks=num_res_blocks,
+            attn_resolutions=attn_resolutions,
+            dropout=dropout,
+            in_channels=in_channels,
+            resolution=resolution,
+            z_channels=z_channels,
+            enable_mid=enable_mid,
+            head_size=head_size,
+        )
+        self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25)
+        self.quant_conv = torch.nn.Conv2d(z_channels, embed_dim, 1)
+        self.post_quant_conv = torch.nn.Conv2d(embed_dim, z_channels, 1)
+        if fix_decoder:
+            for _, param in self.decoder.named_parameters():
+                param.requires_grad = False
+            for _, param in self.post_quant_conv.named_parameters():
+                param.requires_grad = False
+            for _, param in self.quantize.named_parameters():
+                param.requires_grad = False
+        elif fix_codebook:
+            for _, param in self.quantize.named_parameters():
+                param.requires_grad = False
+        if fix_encoder:
+            for _, param in self.encoder.named_parameters():
+                param.requires_grad = False
+        self.load_state_dict(state_dict)
+    def encode(self, x):
+        hs = self.encoder(x)
+        h = self.quant_conv(hs["out"])
+        quant, emb_loss, info = self.quantize(h)
+        return quant, emb_loss, info, hs
+    def decode(self, quant, hs):
+        quant = self.post_quant_conv(quant)
+        dec = self.decoder(quant, hs)
+        return dec
+    def forward(self, input, **kwargs):
+        quant, diff, info, hs = self.encode(input)
+        dec = self.decode(quant, hs)
+        return dec, None

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/stylegan2_arch.py ADDED Viewed

	@@ -0,0 +1,865 @@

+# pylint: skip-file
+# type: ignore
+import math
+import random
+import torch
+from torch import nn
+from torch.nn import functional as F
+from .fused_act import FusedLeakyReLU, fused_leaky_relu
+from .upfirdn2d import upfirdn2d
+class NormStyleCode(nn.Module):
+    def forward(self, x):
+        """Normalize the style codes.
+        Args:
+            x (Tensor): Style codes with shape (b, c).
+        Returns:
+            Tensor: Normalized tensor.
+        """
+        return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8)
+def make_resample_kernel(k):
+    """Make resampling kernel for UpFirDn.
+    Args:
+        k (list[int]): A list indicating the 1D resample kernel magnitude.
+    Returns:
+        Tensor: 2D resampled kernel.
+    """
+    k = torch.tensor(k, dtype=torch.float32)
+    if k.ndim == 1:
+        k = k[None, :] * k[:, None]  # to 2D kernel, outer product
+    # normalize
+    k /= k.sum()
+    return k
+class UpFirDnUpsample(nn.Module):
+    """Upsample, FIR filter, and downsample (upsampole version).
+    References:
+    1. https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.upfirdn.html  # noqa: E501
+    2. http://www.ece.northwestern.edu/local-apps/matlabhelp/toolbox/signal/upfirdn.html  # noqa: E501
+    Args:
+        resample_kernel (list[int]): A list indicating the 1D resample kernel
+            magnitude.
+        factor (int): Upsampling scale factor. Default: 2.
+    """
+    def __init__(self, resample_kernel, factor=2):
+        super(UpFirDnUpsample, self).__init__()
+        self.kernel = make_resample_kernel(resample_kernel) * (factor**2)
+        self.factor = factor
+        pad = self.kernel.shape[0] - factor
+        self.pad = ((pad + 1) // 2 + factor - 1, pad // 2)
+    def forward(self, x):
+        out = upfirdn2d(x, self.kernel.type_as(x), up=self.factor, down=1, pad=self.pad)
+        return out
+    def __repr__(self):
+        return f"{self.__class__.__name__}(factor={self.factor})"
+class UpFirDnDownsample(nn.Module):
+    """Upsample, FIR filter, and downsample (downsampole version).
+    Args:
+        resample_kernel (list[int]): A list indicating the 1D resample kernel
+            magnitude.
+        factor (int): Downsampling scale factor. Default: 2.
+    """
+    def __init__(self, resample_kernel, factor=2):
+        super(UpFirDnDownsample, self).__init__()
+        self.kernel = make_resample_kernel(resample_kernel)
+        self.factor = factor
+        pad = self.kernel.shape[0] - factor
+        self.pad = ((pad + 1) // 2, pad // 2)
+    def forward(self, x):
+        out = upfirdn2d(x, self.kernel.type_as(x), up=1, down=self.factor, pad=self.pad)
+        return out
+    def __repr__(self):
+        return f"{self.__class__.__name__}(factor={self.factor})"
+class UpFirDnSmooth(nn.Module):
+    """Upsample, FIR filter, and downsample (smooth version).
+    Args:
+        resample_kernel (list[int]): A list indicating the 1D resample kernel
+            magnitude.
+        upsample_factor (int): Upsampling scale factor. Default: 1.
+        downsample_factor (int): Downsampling scale factor. Default: 1.
+        kernel_size (int): Kernel size: Default: 1.
+    """
+    def __init__(
+        self, resample_kernel, upsample_factor=1, downsample_factor=1, kernel_size=1
+    ):
+        super(UpFirDnSmooth, self).__init__()
+        self.upsample_factor = upsample_factor
+        self.downsample_factor = downsample_factor
+        self.kernel = make_resample_kernel(resample_kernel)
+        if upsample_factor > 1:
+            self.kernel = self.kernel * (upsample_factor**2)
+        if upsample_factor > 1:
+            pad = (self.kernel.shape[0] - upsample_factor) - (kernel_size - 1)
+            self.pad = ((pad + 1) // 2 + upsample_factor - 1, pad // 2 + 1)
+        elif downsample_factor > 1:
+            pad = (self.kernel.shape[0] - downsample_factor) + (kernel_size - 1)
+            self.pad = ((pad + 1) // 2, pad // 2)
+        else:
+            raise NotImplementedError
+    def forward(self, x):
+        out = upfirdn2d(x, self.kernel.type_as(x), up=1, down=1, pad=self.pad)
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(upsample_factor={self.upsample_factor}"
+            f", downsample_factor={self.downsample_factor})"
+        )
+class EqualLinear(nn.Module):
+    """Equalized Linear as StyleGAN2.
+    Args:
+        in_channels (int): Size of each sample.
+        out_channels (int): Size of each output sample.
+        bias (bool): If set to ``False``, the layer will not learn an additive
+            bias. Default: ``True``.
+        bias_init_val (float): Bias initialized value. Default: 0.
+        lr_mul (float): Learning rate multiplier. Default: 1.
+        activation (None | str): The activation after ``linear`` operation.
+            Supported: 'fused_lrelu', None. Default: None.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        bias=True,
+        bias_init_val=0,
+        lr_mul=1,
+        activation=None,
+    ):
+        super(EqualLinear, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.lr_mul = lr_mul
+        self.activation = activation
+        if self.activation not in ["fused_lrelu", None]:
+            raise ValueError(
+                f"Wrong activation value in EqualLinear: {activation}"
+                "Supported ones are: ['fused_lrelu', None]."
+            )
+        self.scale = (1 / math.sqrt(in_channels)) * lr_mul
+        self.weight = nn.Parameter(torch.randn(out_channels, in_channels).div_(lr_mul))
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val))
+        else:
+            self.register_parameter("bias", None)
+    def forward(self, x):
+        if self.bias is None:
+            bias = None
+        else:
+            bias = self.bias * self.lr_mul
+        if self.activation == "fused_lrelu":
+            out = F.linear(x, self.weight * self.scale)
+            out = fused_leaky_relu(out, bias)
+        else:
+            out = F.linear(x, self.weight * self.scale, bias=bias)
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(in_channels={self.in_channels}, "
+            f"out_channels={self.out_channels}, bias={self.bias is not None})"
+        )
+class ModulatedConv2d(nn.Module):
+    """Modulated Conv2d used in StyleGAN2.
+    There is no bias in ModulatedConv2d.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Size of the convolving kernel.
+        num_style_feat (int): Channel number of style features.
+        demodulate (bool): Whether to demodulate in the conv layer.
+            Default: True.
+        sample_mode (str | None): Indicating 'upsample', 'downsample' or None.
+            Default: None.
+        resample_kernel (list[int]): A list indicating the 1D resample kernel
+            magnitude. Default: (1, 3, 3, 1).
+        eps (float): A value added to the denominator for numerical stability.
+            Default: 1e-8.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        num_style_feat,
+        demodulate=True,
+        sample_mode=None,
+        resample_kernel=(1, 3, 3, 1),
+        eps=1e-8,
+    ):
+        super(ModulatedConv2d, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.demodulate = demodulate
+        self.sample_mode = sample_mode
+        self.eps = eps
+        if self.sample_mode == "upsample":
+            self.smooth = UpFirDnSmooth(
+                resample_kernel,
+                upsample_factor=2,
+                downsample_factor=1,
+                kernel_size=kernel_size,
+            )
+        elif self.sample_mode == "downsample":
+            self.smooth = UpFirDnSmooth(
+                resample_kernel,
+                upsample_factor=1,
+                downsample_factor=2,
+                kernel_size=kernel_size,
+            )
+        elif self.sample_mode is None:
+            pass
+        else:
+            raise ValueError(
+                f"Wrong sample mode {self.sample_mode}, "
+                "supported ones are ['upsample', 'downsample', None]."
+            )
+        self.scale = 1 / math.sqrt(in_channels * kernel_size**2)
+        # modulation inside each modulated conv
+        self.modulation = EqualLinear(
+            num_style_feat,
+            in_channels,
+            bias=True,
+            bias_init_val=1,
+            lr_mul=1,
+            activation=None,
+        )
+        self.weight = nn.Parameter(
+            torch.randn(1, out_channels, in_channels, kernel_size, kernel_size)
+        )
+        self.padding = kernel_size // 2
+    def forward(self, x, style):
+        """Forward function.
+        Args:
+            x (Tensor): Tensor with shape (b, c, h, w).
+            style (Tensor): Tensor with shape (b, num_style_feat).
+        Returns:
+            Tensor: Modulated tensor after convolution.
+        """
+        b, c, h, w = x.shape  # c = c_in
+        # weight modulation
+        style = self.modulation(style).view(b, 1, c, 1, 1)
+        # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1)
+        weight = self.scale * self.weight * style  # (b, c_out, c_in, k, k)
+        if self.demodulate:
+            demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps)
+            weight = weight * demod.view(b, self.out_channels, 1, 1, 1)
+        weight = weight.view(
+            b * self.out_channels, c, self.kernel_size, self.kernel_size
+        )
+        if self.sample_mode == "upsample":
+            x = x.view(1, b * c, h, w)
+            weight = weight.view(
+                b, self.out_channels, c, self.kernel_size, self.kernel_size
+            )
+            weight = weight.transpose(1, 2).reshape(
+                b * c, self.out_channels, self.kernel_size, self.kernel_size
+            )
+            out = F.conv_transpose2d(x, weight, padding=0, stride=2, groups=b)
+            out = out.view(b, self.out_channels, *out.shape[2:4])
+            out = self.smooth(out)
+        elif self.sample_mode == "downsample":
+            x = self.smooth(x)
+            x = x.view(1, b * c, *x.shape[2:4])
+            out = F.conv2d(x, weight, padding=0, stride=2, groups=b)
+            out = out.view(b, self.out_channels, *out.shape[2:4])
+        else:
+            x = x.view(1, b * c, h, w)
+            # weight: (b*c_out, c_in, k, k), groups=b
+            out = F.conv2d(x, weight, padding=self.padding, groups=b)
+            out = out.view(b, self.out_channels, *out.shape[2:4])
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(in_channels={self.in_channels}, "
+            f"out_channels={self.out_channels}, "
+            f"kernel_size={self.kernel_size}, "
+            f"demodulate={self.demodulate}, sample_mode={self.sample_mode})"
+        )
+class StyleConv(nn.Module):
+    """Style conv.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Size of the convolving kernel.
+        num_style_feat (int): Channel number of style features.
+        demodulate (bool): Whether demodulate in the conv layer. Default: True.
+        sample_mode (str | None): Indicating 'upsample', 'downsample' or None.
+            Default: None.
+        resample_kernel (list[int]): A list indicating the 1D resample kernel
+            magnitude. Default: (1, 3, 3, 1).
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        num_style_feat,
+        demodulate=True,
+        sample_mode=None,
+        resample_kernel=(1, 3, 3, 1),
+    ):
+        super(StyleConv, self).__init__()
+        self.modulated_conv = ModulatedConv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            num_style_feat,
+            demodulate=demodulate,
+            sample_mode=sample_mode,
+            resample_kernel=resample_kernel,
+        )
+        self.weight = nn.Parameter(torch.zeros(1))  # for noise injection
+        self.activate = FusedLeakyReLU(out_channels)
+    def forward(self, x, style, noise=None):
+        # modulate
+        out = self.modulated_conv(x, style)
+        # noise injection
+        if noise is None:
+            b, _, h, w = out.shape
+            noise = out.new_empty(b, 1, h, w).normal_()
+        out = out + self.weight * noise
+        # activation (with bias)
+        out = self.activate(out)
+        return out
+class ToRGB(nn.Module):
+    """To RGB from features.
+    Args:
+        in_channels (int): Channel number of input.
+        num_style_feat (int): Channel number of style features.
+        upsample (bool): Whether to upsample. Default: True.
+        resample_kernel (list[int]): A list indicating the 1D resample kernel
+            magnitude. Default: (1, 3, 3, 1).
+    """
+    def __init__(
+        self, in_channels, num_style_feat, upsample=True, resample_kernel=(1, 3, 3, 1)
+    ):
+        super(ToRGB, self).__init__()
+        if upsample:
+            self.upsample = UpFirDnUpsample(resample_kernel, factor=2)
+        else:
+            self.upsample = None
+        self.modulated_conv = ModulatedConv2d(
+            in_channels,
+            3,
+            kernel_size=1,
+            num_style_feat=num_style_feat,
+            demodulate=False,
+            sample_mode=None,
+        )
+        self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
+    def forward(self, x, style, skip=None):
+        """Forward function.
+        Args:
+            x (Tensor): Feature tensor with shape (b, c, h, w).
+            style (Tensor): Tensor with shape (b, num_style_feat).
+            skip (Tensor): Base/skip tensor. Default: None.
+        Returns:
+            Tensor: RGB images.
+        """
+        out = self.modulated_conv(x, style)
+        out = out + self.bias
+        if skip is not None:
+            if self.upsample:
+                skip = self.upsample(skip)
+            out = out + skip
+        return out
+class ConstantInput(nn.Module):
+    """Constant input.
+    Args:
+        num_channel (int): Channel number of constant input.
+        size (int): Spatial size of constant input.
+    """
+    def __init__(self, num_channel, size):
+        super(ConstantInput, self).__init__()
+        self.weight = nn.Parameter(torch.randn(1, num_channel, size, size))
+    def forward(self, batch):
+        out = self.weight.repeat(batch, 1, 1, 1)
+        return out
+class StyleGAN2Generator(nn.Module):
+    """StyleGAN2 Generator.
+    Args:
+        out_size (int): The spatial size of outputs.
+        num_style_feat (int): Channel number of style features. Default: 512.
+        num_mlp (int): Layer number of MLP style layers. Default: 8.
+        channel_multiplier (int): Channel multiplier for large networks of
+            StyleGAN2. Default: 2.
+        resample_kernel (list[int]): A list indicating the 1D resample kernel
+            magnitude. A cross production will be applied to extent 1D resample
+            kernel to 2D resample kernel. Default: (1, 3, 3, 1).
+        lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01.
+        narrow (float): Narrow ratio for channels. Default: 1.0.
+    """
+    def __init__(
+        self,
+        out_size,
+        num_style_feat=512,
+        num_mlp=8,
+        channel_multiplier=2,
+        resample_kernel=(1, 3, 3, 1),
+        lr_mlp=0.01,
+        narrow=1,
+    ):
+        super(StyleGAN2Generator, self).__init__()
+        # Style MLP layers
+        self.num_style_feat = num_style_feat
+        style_mlp_layers = [NormStyleCode()]
+        for i in range(num_mlp):
+            style_mlp_layers.append(
+                EqualLinear(
+                    num_style_feat,
+                    num_style_feat,
+                    bias=True,
+                    bias_init_val=0,
+                    lr_mul=lr_mlp,
+                    activation="fused_lrelu",
+                )
+            )
+        self.style_mlp = nn.Sequential(*style_mlp_layers)
+        channels = {
+            "4": int(512 * narrow),
+            "8": int(512 * narrow),
+            "16": int(512 * narrow),
+            "32": int(512 * narrow),
+            "64": int(256 * channel_multiplier * narrow),
+            "128": int(128 * channel_multiplier * narrow),
+            "256": int(64 * channel_multiplier * narrow),
+            "512": int(32 * channel_multiplier * narrow),
+            "1024": int(16 * channel_multiplier * narrow),
+        }
+        self.channels = channels
+        self.constant_input = ConstantInput(channels["4"], size=4)
+        self.style_conv1 = StyleConv(
+            channels["4"],
+            channels["4"],
+            kernel_size=3,
+            num_style_feat=num_style_feat,
+            demodulate=True,
+            sample_mode=None,
+            resample_kernel=resample_kernel,
+        )
+        self.to_rgb1 = ToRGB(
+            channels["4"],
+            num_style_feat,
+            upsample=False,
+            resample_kernel=resample_kernel,
+        )
+        self.log_size = int(math.log(out_size, 2))
+        self.num_layers = (self.log_size - 2) * 2 + 1
+        self.num_latent = self.log_size * 2 - 2
+        self.style_convs = nn.ModuleList()
+        self.to_rgbs = nn.ModuleList()
+        self.noises = nn.Module()
+        in_channels = channels["4"]
+        # noise
+        for layer_idx in range(self.num_layers):
+            resolution = 2 ** ((layer_idx + 5) // 2)
+            shape = [1, 1, resolution, resolution]
+            self.noises.register_buffer(f"noise{layer_idx}", torch.randn(*shape))
+        # style convs and to_rgbs
+        for i in range(3, self.log_size + 1):
+            out_channels = channels[f"{2**i}"]
+            self.style_convs.append(
+                StyleConv(
+                    in_channels,
+                    out_channels,
+                    kernel_size=3,
+                    num_style_feat=num_style_feat,
+                    demodulate=True,
+                    sample_mode="upsample",
+                    resample_kernel=resample_kernel,
+                )
+            )
+            self.style_convs.append(
+                StyleConv(
+                    out_channels,
+                    out_channels,
+                    kernel_size=3,
+                    num_style_feat=num_style_feat,
+                    demodulate=True,
+                    sample_mode=None,
+                    resample_kernel=resample_kernel,
+                )
+            )
+            self.to_rgbs.append(
+                ToRGB(
+                    out_channels,
+                    num_style_feat,
+                    upsample=True,
+                    resample_kernel=resample_kernel,
+                )
+            )
+            in_channels = out_channels
+    def make_noise(self):
+        """Make noise for noise injection."""
+        device = self.constant_input.weight.device
+        noises = [torch.randn(1, 1, 4, 4, device=device)]
+        for i in range(3, self.log_size + 1):
+            for _ in range(2):
+                noises.append(torch.randn(1, 1, 2**i, 2**i, device=device))
+        return noises
+    def get_latent(self, x):
+        return self.style_mlp(x)
+    def mean_latent(self, num_latent):
+        latent_in = torch.randn(
+            num_latent, self.num_style_feat, device=self.constant_input.weight.device
+        )
+        latent = self.style_mlp(latent_in).mean(0, keepdim=True)
+        return latent
+    def forward(
+        self,
+        styles,
+        input_is_latent=False,
+        noise=None,
+        randomize_noise=True,
+        truncation=1,
+        truncation_latent=None,
+        inject_index=None,
+        return_latents=False,
+    ):
+        """Forward function for StyleGAN2Generator.
+        Args:
+            styles (list[Tensor]): Sample codes of styles.
+            input_is_latent (bool): Whether input is latent style.
+                Default: False.
+            noise (Tensor | None): Input noise or None. Default: None.
+            randomize_noise (bool): Randomize noise, used when 'noise' is
+                False. Default: True.
+            truncation (float): TODO. Default: 1.
+            truncation_latent (Tensor | None): TODO. Default: None.
+            inject_index (int | None): The injection index for mixing noise.
+                Default: None.
+            return_latents (bool): Whether to return style latents.
+                Default: False.
+        """
+        # style codes -> latents with Style MLP layer
+        if not input_is_latent:
+            styles = [self.style_mlp(s) for s in styles]
+        # noises
+        if noise is None:
+            if randomize_noise:
+                noise = [None] * self.num_layers  # for each style conv layer
+            else:  # use the stored noise
+                noise = [
+                    getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
+                ]
+        # style truncation
+        if truncation < 1:
+            style_truncation = []
+            for style in styles:
+                style_truncation.append(
+                    truncation_latent + truncation * (style - truncation_latent)
+                )
+            styles = style_truncation
+        # get style latent with injection
+        if len(styles) == 1:
+            inject_index = self.num_latent
+            if styles[0].ndim < 3:
+                # repeat latent code for all the layers
+                latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            else:  # used for encoder with different latent code for each layer
+                latent = styles[0]
+        elif len(styles) == 2:  # mixing noises
+            if inject_index is None:
+                inject_index = random.randint(1, self.num_latent - 1)
+            latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            latent2 = (
+                styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
+            )
+            latent = torch.cat([latent1, latent2], 1)
+        # main generation
+        out = self.constant_input(latent.shape[0])
+        out = self.style_conv1(out, latent[:, 0], noise=noise[0])
+        skip = self.to_rgb1(out, latent[:, 1])
+        i = 1
+        for conv1, conv2, noise1, noise2, to_rgb in zip(
+            self.style_convs[::2],
+            self.style_convs[1::2],
+            noise[1::2],
+            noise[2::2],
+            self.to_rgbs,
+        ):
+            out = conv1(out, latent[:, i], noise=noise1)
+            out = conv2(out, latent[:, i + 1], noise=noise2)
+            skip = to_rgb(out, latent[:, i + 2], skip)
+            i += 2
+        image = skip
+        if return_latents:
+            return image, latent
+        else:
+            return image, None
+class ScaledLeakyReLU(nn.Module):
+    """Scaled LeakyReLU.
+    Args:
+        negative_slope (float): Negative slope. Default: 0.2.
+    """
+    def __init__(self, negative_slope=0.2):
+        super(ScaledLeakyReLU, self).__init__()
+        self.negative_slope = negative_slope
+    def forward(self, x):
+        out = F.leaky_relu(x, negative_slope=self.negative_slope)
+        return out * math.sqrt(2)
+class EqualConv2d(nn.Module):
+    """Equalized Linear as StyleGAN2.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Size of the convolving kernel.
+        stride (int): Stride of the convolution. Default: 1
+        padding (int): Zero-padding added to both sides of the input.
+            Default: 0.
+        bias (bool): If ``True``, adds a learnable bias to the output.
+            Default: ``True``.
+        bias_init_val (float): Bias initialized value. Default: 0.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        bias=True,
+        bias_init_val=0,
+    ):
+        super(EqualConv2d, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.scale = 1 / math.sqrt(in_channels * kernel_size**2)
+        self.weight = nn.Parameter(
+            torch.randn(out_channels, in_channels, kernel_size, kernel_size)
+        )
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val))
+        else:
+            self.register_parameter("bias", None)
+    def forward(self, x):
+        out = F.conv2d(
+            x,
+            self.weight * self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+        )
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(in_channels={self.in_channels}, "
+            f"out_channels={self.out_channels}, "
+            f"kernel_size={self.kernel_size},"
+            f" stride={self.stride}, padding={self.padding}, "
+            f"bias={self.bias is not None})"
+        )
+class ConvLayer(nn.Sequential):
+    """Conv Layer used in StyleGAN2 Discriminator.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Kernel size.
+        downsample (bool): Whether downsample by a factor of 2.
+            Default: False.
+        resample_kernel (list[int]): A list indicating the 1D resample
+            kernel magnitude. A cross production will be applied to
+            extent 1D resample kernel to 2D resample kernel.
+            Default: (1, 3, 3, 1).
+        bias (bool): Whether with bias. Default: True.
+        activate (bool): Whether use activateion. Default: True.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        downsample=False,
+        resample_kernel=(1, 3, 3, 1),
+        bias=True,
+        activate=True,
+    ):
+        layers = []
+        # downsample
+        if downsample:
+            layers.append(
+                UpFirDnSmooth(
+                    resample_kernel,
+                    upsample_factor=1,
+                    downsample_factor=2,
+                    kernel_size=kernel_size,
+                )
+            )
+            stride = 2
+            self.padding = 0
+        else:
+            stride = 1
+            self.padding = kernel_size // 2
+        # conv
+        layers.append(
+            EqualConv2d(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride=stride,
+                padding=self.padding,
+                bias=bias and not activate,
+            )
+        )
+        # activation
+        if activate:
+            if bias:
+                layers.append(FusedLeakyReLU(out_channels))
+            else:
+                layers.append(ScaledLeakyReLU(0.2))
+        super(ConvLayer, self).__init__(*layers)
+class ResBlock(nn.Module):
+    """Residual block used in StyleGAN2 Discriminator.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        resample_kernel (list[int]): A list indicating the 1D resample
+            kernel magnitude. A cross production will be applied to
+            extent 1D resample kernel to 2D resample kernel.
+            Default: (1, 3, 3, 1).
+    """
+    def __init__(self, in_channels, out_channels, resample_kernel=(1, 3, 3, 1)):
+        super(ResBlock, self).__init__()
+        self.conv1 = ConvLayer(in_channels, in_channels, 3, bias=True, activate=True)
+        self.conv2 = ConvLayer(
+            in_channels,
+            out_channels,
+            3,
+            downsample=True,
+            resample_kernel=resample_kernel,
+            bias=True,
+            activate=True,
+        )
+        self.skip = ConvLayer(
+            in_channels,
+            out_channels,
+            1,
+            downsample=True,
+            resample_kernel=resample_kernel,
+            bias=False,
+            activate=False,
+        )
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.conv2(out)
+        skip = self.skip(x)
+        out = (out + skip) / math.sqrt(2)
+        return out

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/stylegan2_bilinear_arch.py ADDED Viewed

	@@ -0,0 +1,709 @@

+# pylint: skip-file
+# type: ignore
+import math
+import random
+import torch
+from torch import nn
+from torch.nn import functional as F
+from .fused_act import FusedLeakyReLU, fused_leaky_relu
+class NormStyleCode(nn.Module):
+    def forward(self, x):
+        """Normalize the style codes.
+        Args:
+            x (Tensor): Style codes with shape (b, c).
+        Returns:
+            Tensor: Normalized tensor.
+        """
+        return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8)
+class EqualLinear(nn.Module):
+    """Equalized Linear as StyleGAN2.
+    Args:
+        in_channels (int): Size of each sample.
+        out_channels (int): Size of each output sample.
+        bias (bool): If set to ``False``, the layer will not learn an additive
+            bias. Default: ``True``.
+        bias_init_val (float): Bias initialized value. Default: 0.
+        lr_mul (float): Learning rate multiplier. Default: 1.
+        activation (None | str): The activation after ``linear`` operation.
+            Supported: 'fused_lrelu', None. Default: None.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        bias=True,
+        bias_init_val=0,
+        lr_mul=1,
+        activation=None,
+    ):
+        super(EqualLinear, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.lr_mul = lr_mul
+        self.activation = activation
+        if self.activation not in ["fused_lrelu", None]:
+            raise ValueError(
+                f"Wrong activation value in EqualLinear: {activation}"
+                "Supported ones are: ['fused_lrelu', None]."
+            )
+        self.scale = (1 / math.sqrt(in_channels)) * lr_mul
+        self.weight = nn.Parameter(torch.randn(out_channels, in_channels).div_(lr_mul))
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val))
+        else:
+            self.register_parameter("bias", None)
+    def forward(self, x):
+        if self.bias is None:
+            bias = None
+        else:
+            bias = self.bias * self.lr_mul
+        if self.activation == "fused_lrelu":
+            out = F.linear(x, self.weight * self.scale)
+            out = fused_leaky_relu(out, bias)
+        else:
+            out = F.linear(x, self.weight * self.scale, bias=bias)
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(in_channels={self.in_channels}, "
+            f"out_channels={self.out_channels}, bias={self.bias is not None})"
+        )
+class ModulatedConv2d(nn.Module):
+    """Modulated Conv2d used in StyleGAN2.
+    There is no bias in ModulatedConv2d.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Size of the convolving kernel.
+        num_style_feat (int): Channel number of style features.
+        demodulate (bool): Whether to demodulate in the conv layer.
+            Default: True.
+        sample_mode (str | None): Indicating 'upsample', 'downsample' or None.
+            Default: None.
+        eps (float): A value added to the denominator for numerical stability.
+            Default: 1e-8.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        num_style_feat,
+        demodulate=True,
+        sample_mode=None,
+        eps=1e-8,
+        interpolation_mode="bilinear",
+    ):
+        super(ModulatedConv2d, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.demodulate = demodulate
+        self.sample_mode = sample_mode
+        self.eps = eps
+        self.interpolation_mode = interpolation_mode
+        if self.interpolation_mode == "nearest":
+            self.align_corners = None
+        else:
+            self.align_corners = False
+        self.scale = 1 / math.sqrt(in_channels * kernel_size**2)
+        # modulation inside each modulated conv
+        self.modulation = EqualLinear(
+            num_style_feat,
+            in_channels,
+            bias=True,
+            bias_init_val=1,
+            lr_mul=1,
+            activation=None,
+        )
+        self.weight = nn.Parameter(
+            torch.randn(1, out_channels, in_channels, kernel_size, kernel_size)
+        )
+        self.padding = kernel_size // 2
+    def forward(self, x, style):
+        """Forward function.
+        Args:
+            x (Tensor): Tensor with shape (b, c, h, w).
+            style (Tensor): Tensor with shape (b, num_style_feat).
+        Returns:
+            Tensor: Modulated tensor after convolution.
+        """
+        b, c, h, w = x.shape  # c = c_in
+        # weight modulation
+        style = self.modulation(style).view(b, 1, c, 1, 1)
+        # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1)
+        weight = self.scale * self.weight * style  # (b, c_out, c_in, k, k)
+        if self.demodulate:
+            demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps)
+            weight = weight * demod.view(b, self.out_channels, 1, 1, 1)
+        weight = weight.view(
+            b * self.out_channels, c, self.kernel_size, self.kernel_size
+        )
+        if self.sample_mode == "upsample":
+            x = F.interpolate(
+                x,
+                scale_factor=2,
+                mode=self.interpolation_mode,
+                align_corners=self.align_corners,
+            )
+        elif self.sample_mode == "downsample":
+            x = F.interpolate(
+                x,
+                scale_factor=0.5,
+                mode=self.interpolation_mode,
+                align_corners=self.align_corners,
+            )
+        b, c, h, w = x.shape
+        x = x.view(1, b * c, h, w)
+        # weight: (b*c_out, c_in, k, k), groups=b
+        out = F.conv2d(x, weight, padding=self.padding, groups=b)
+        out = out.view(b, self.out_channels, *out.shape[2:4])
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(in_channels={self.in_channels}, "
+            f"out_channels={self.out_channels}, "
+            f"kernel_size={self.kernel_size}, "
+            f"demodulate={self.demodulate}, sample_mode={self.sample_mode})"
+        )
+class StyleConv(nn.Module):
+    """Style conv.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Size of the convolving kernel.
+        num_style_feat (int): Channel number of style features.
+        demodulate (bool): Whether demodulate in the conv layer. Default: True.
+        sample_mode (str | None): Indicating 'upsample', 'downsample' or None.
+            Default: None.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        num_style_feat,
+        demodulate=True,
+        sample_mode=None,
+        interpolation_mode="bilinear",
+    ):
+        super(StyleConv, self).__init__()
+        self.modulated_conv = ModulatedConv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            num_style_feat,
+            demodulate=demodulate,
+            sample_mode=sample_mode,
+            interpolation_mode=interpolation_mode,
+        )
+        self.weight = nn.Parameter(torch.zeros(1))  # for noise injection
+        self.activate = FusedLeakyReLU(out_channels)
+    def forward(self, x, style, noise=None):
+        # modulate
+        out = self.modulated_conv(x, style)
+        # noise injection
+        if noise is None:
+            b, _, h, w = out.shape
+            noise = out.new_empty(b, 1, h, w).normal_()
+        out = out + self.weight * noise
+        # activation (with bias)
+        out = self.activate(out)
+        return out
+class ToRGB(nn.Module):
+    """To RGB from features.
+    Args:
+        in_channels (int): Channel number of input.
+        num_style_feat (int): Channel number of style features.
+        upsample (bool): Whether to upsample. Default: True.
+    """
+    def __init__(
+        self, in_channels, num_style_feat, upsample=True, interpolation_mode="bilinear"
+    ):
+        super(ToRGB, self).__init__()
+        self.upsample = upsample
+        self.interpolation_mode = interpolation_mode
+        if self.interpolation_mode == "nearest":
+            self.align_corners = None
+        else:
+            self.align_corners = False
+        self.modulated_conv = ModulatedConv2d(
+            in_channels,
+            3,
+            kernel_size=1,
+            num_style_feat=num_style_feat,
+            demodulate=False,
+            sample_mode=None,
+            interpolation_mode=interpolation_mode,
+        )
+        self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
+    def forward(self, x, style, skip=None):
+        """Forward function.
+        Args:
+            x (Tensor): Feature tensor with shape (b, c, h, w).
+            style (Tensor): Tensor with shape (b, num_style_feat).
+            skip (Tensor): Base/skip tensor. Default: None.
+        Returns:
+            Tensor: RGB images.
+        """
+        out = self.modulated_conv(x, style)
+        out = out + self.bias
+        if skip is not None:
+            if self.upsample:
+                skip = F.interpolate(
+                    skip,
+                    scale_factor=2,
+                    mode=self.interpolation_mode,
+                    align_corners=self.align_corners,
+                )
+            out = out + skip
+        return out
+class ConstantInput(nn.Module):
+    """Constant input.
+    Args:
+        num_channel (int): Channel number of constant input.
+        size (int): Spatial size of constant input.
+    """
+    def __init__(self, num_channel, size):
+        super(ConstantInput, self).__init__()
+        self.weight = nn.Parameter(torch.randn(1, num_channel, size, size))
+    def forward(self, batch):
+        out = self.weight.repeat(batch, 1, 1, 1)
+        return out
+class StyleGAN2GeneratorBilinear(nn.Module):
+    """StyleGAN2 Generator.
+    Args:
+        out_size (int): The spatial size of outputs.
+        num_style_feat (int): Channel number of style features. Default: 512.
+        num_mlp (int): Layer number of MLP style layers. Default: 8.
+        channel_multiplier (int): Channel multiplier for large networks of
+            StyleGAN2. Default: 2.
+        lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01.
+        narrow (float): Narrow ratio for channels. Default: 1.0.
+    """
+    def __init__(
+        self,
+        out_size,
+        num_style_feat=512,
+        num_mlp=8,
+        channel_multiplier=2,
+        lr_mlp=0.01,
+        narrow=1,
+        interpolation_mode="bilinear",
+    ):
+        super(StyleGAN2GeneratorBilinear, self).__init__()
+        # Style MLP layers
+        self.num_style_feat = num_style_feat
+        style_mlp_layers = [NormStyleCode()]
+        for i in range(num_mlp):
+            style_mlp_layers.append(
+                EqualLinear(
+                    num_style_feat,
+                    num_style_feat,
+                    bias=True,
+                    bias_init_val=0,
+                    lr_mul=lr_mlp,
+                    activation="fused_lrelu",
+                )
+            )
+        self.style_mlp = nn.Sequential(*style_mlp_layers)
+        channels = {
+            "4": int(512 * narrow),
+            "8": int(512 * narrow),
+            "16": int(512 * narrow),
+            "32": int(512 * narrow),
+            "64": int(256 * channel_multiplier * narrow),
+            "128": int(128 * channel_multiplier * narrow),
+            "256": int(64 * channel_multiplier * narrow),
+            "512": int(32 * channel_multiplier * narrow),
+            "1024": int(16 * channel_multiplier * narrow),
+        }
+        self.channels = channels
+        self.constant_input = ConstantInput(channels["4"], size=4)
+        self.style_conv1 = StyleConv(
+            channels["4"],
+            channels["4"],
+            kernel_size=3,
+            num_style_feat=num_style_feat,
+            demodulate=True,
+            sample_mode=None,
+            interpolation_mode=interpolation_mode,
+        )
+        self.to_rgb1 = ToRGB(
+            channels["4"],
+            num_style_feat,
+            upsample=False,
+            interpolation_mode=interpolation_mode,
+        )
+        self.log_size = int(math.log(out_size, 2))
+        self.num_layers = (self.log_size - 2) * 2 + 1
+        self.num_latent = self.log_size * 2 - 2
+        self.style_convs = nn.ModuleList()
+        self.to_rgbs = nn.ModuleList()
+        self.noises = nn.Module()
+        in_channels = channels["4"]
+        # noise
+        for layer_idx in range(self.num_layers):
+            resolution = 2 ** ((layer_idx + 5) // 2)
+            shape = [1, 1, resolution, resolution]
+            self.noises.register_buffer(f"noise{layer_idx}", torch.randn(*shape))
+        # style convs and to_rgbs
+        for i in range(3, self.log_size + 1):
+            out_channels = channels[f"{2**i}"]
+            self.style_convs.append(
+                StyleConv(
+                    in_channels,
+                    out_channels,
+                    kernel_size=3,
+                    num_style_feat=num_style_feat,
+                    demodulate=True,
+                    sample_mode="upsample",
+                    interpolation_mode=interpolation_mode,
+                )
+            )
+            self.style_convs.append(
+                StyleConv(
+                    out_channels,
+                    out_channels,
+                    kernel_size=3,
+                    num_style_feat=num_style_feat,
+                    demodulate=True,
+                    sample_mode=None,
+                    interpolation_mode=interpolation_mode,
+                )
+            )
+            self.to_rgbs.append(
+                ToRGB(
+                    out_channels,
+                    num_style_feat,
+                    upsample=True,
+                    interpolation_mode=interpolation_mode,
+                )
+            )
+            in_channels = out_channels
+    def make_noise(self):
+        """Make noise for noise injection."""
+        device = self.constant_input.weight.device
+        noises = [torch.randn(1, 1, 4, 4, device=device)]
+        for i in range(3, self.log_size + 1):
+            for _ in range(2):
+                noises.append(torch.randn(1, 1, 2**i, 2**i, device=device))
+        return noises
+    def get_latent(self, x):
+        return self.style_mlp(x)
+    def mean_latent(self, num_latent):
+        latent_in = torch.randn(
+            num_latent, self.num_style_feat, device=self.constant_input.weight.device
+        )
+        latent = self.style_mlp(latent_in).mean(0, keepdim=True)
+        return latent
+    def forward(
+        self,
+        styles,
+        input_is_latent=False,
+        noise=None,
+        randomize_noise=True,
+        truncation=1,
+        truncation_latent=None,
+        inject_index=None,
+        return_latents=False,
+    ):
+        """Forward function for StyleGAN2Generator.
+        Args:
+            styles (list[Tensor]): Sample codes of styles.
+            input_is_latent (bool): Whether input is latent style.
+                Default: False.
+            noise (Tensor | None): Input noise or None. Default: None.
+            randomize_noise (bool): Randomize noise, used when 'noise' is
+                False. Default: True.
+            truncation (float): TODO. Default: 1.
+            truncation_latent (Tensor | None): TODO. Default: None.
+            inject_index (int | None): The injection index for mixing noise.
+                Default: None.
+            return_latents (bool): Whether to return style latents.
+                Default: False.
+        """
+        # style codes -> latents with Style MLP layer
+        if not input_is_latent:
+            styles = [self.style_mlp(s) for s in styles]
+        # noises
+        if noise is None:
+            if randomize_noise:
+                noise = [None] * self.num_layers  # for each style conv layer
+            else:  # use the stored noise
+                noise = [
+                    getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
+                ]
+        # style truncation
+        if truncation < 1:
+            style_truncation = []
+            for style in styles:
+                style_truncation.append(
+                    truncation_latent + truncation * (style - truncation_latent)
+                )
+            styles = style_truncation
+        # get style latent with injection
+        if len(styles) == 1:
+            inject_index = self.num_latent
+            if styles[0].ndim < 3:
+                # repeat latent code for all the layers
+                latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            else:  # used for encoder with different latent code for each layer
+                latent = styles[0]
+        elif len(styles) == 2:  # mixing noises
+            if inject_index is None:
+                inject_index = random.randint(1, self.num_latent - 1)
+            latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            latent2 = (
+                styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
+            )
+            latent = torch.cat([latent1, latent2], 1)
+        # main generation
+        out = self.constant_input(latent.shape[0])
+        out = self.style_conv1(out, latent[:, 0], noise=noise[0])
+        skip = self.to_rgb1(out, latent[:, 1])
+        i = 1
+        for conv1, conv2, noise1, noise2, to_rgb in zip(
+            self.style_convs[::2],
+            self.style_convs[1::2],
+            noise[1::2],
+            noise[2::2],
+            self.to_rgbs,
+        ):
+            out = conv1(out, latent[:, i], noise=noise1)
+            out = conv2(out, latent[:, i + 1], noise=noise2)
+            skip = to_rgb(out, latent[:, i + 2], skip)
+            i += 2
+        image = skip
+        if return_latents:
+            return image, latent
+        else:
+            return image, None
+class ScaledLeakyReLU(nn.Module):
+    """Scaled LeakyReLU.
+    Args:
+        negative_slope (float): Negative slope. Default: 0.2.
+    """
+    def __init__(self, negative_slope=0.2):
+        super(ScaledLeakyReLU, self).__init__()
+        self.negative_slope = negative_slope
+    def forward(self, x):
+        out = F.leaky_relu(x, negative_slope=self.negative_slope)
+        return out * math.sqrt(2)
+class EqualConv2d(nn.Module):
+    """Equalized Linear as StyleGAN2.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Size of the convolving kernel.
+        stride (int): Stride of the convolution. Default: 1
+        padding (int): Zero-padding added to both sides of the input.
+            Default: 0.
+        bias (bool): If ``True``, adds a learnable bias to the output.
+            Default: ``True``.
+        bias_init_val (float): Bias initialized value. Default: 0.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        bias=True,
+        bias_init_val=0,
+    ):
+        super(EqualConv2d, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.scale = 1 / math.sqrt(in_channels * kernel_size**2)
+        self.weight = nn.Parameter(
+            torch.randn(out_channels, in_channels, kernel_size, kernel_size)
+        )
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val))
+        else:
+            self.register_parameter("bias", None)
+    def forward(self, x):
+        out = F.conv2d(
+            x,
+            self.weight * self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+        )
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(in_channels={self.in_channels}, "
+            f"out_channels={self.out_channels}, "
+            f"kernel_size={self.kernel_size},"
+            f" stride={self.stride}, padding={self.padding}, "
+            f"bias={self.bias is not None})"
+        )
+class ConvLayer(nn.Sequential):
+    """Conv Layer used in StyleGAN2 Discriminator.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Kernel size.
+        downsample (bool): Whether downsample by a factor of 2.
+            Default: False.
+        bias (bool): Whether with bias. Default: True.
+        activate (bool): Whether use activateion. Default: True.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        downsample=False,
+        bias=True,
+        activate=True,
+        interpolation_mode="bilinear",
+    ):
+        layers = []
+        self.interpolation_mode = interpolation_mode
+        # downsample
+        if downsample:
+            if self.interpolation_mode == "nearest":
+                self.align_corners = None
+            else:
+                self.align_corners = False
+            layers.append(
+                torch.nn.Upsample(
+                    scale_factor=0.5,
+                    mode=interpolation_mode,
+                    align_corners=self.align_corners,
+                )
+            )
+        stride = 1
+        self.padding = kernel_size // 2
+        # conv
+        layers.append(
+            EqualConv2d(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride=stride,
+                padding=self.padding,
+                bias=bias and not activate,
+            )
+        )
+        # activation
+        if activate:
+            if bias:
+                layers.append(FusedLeakyReLU(out_channels))
+            else:
+                layers.append(ScaledLeakyReLU(0.2))
+        super(ConvLayer, self).__init__(*layers)
+class ResBlock(nn.Module):
+    """Residual block used in StyleGAN2 Discriminator.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+    """
+    def __init__(self, in_channels, out_channels, interpolation_mode="bilinear"):
+        super(ResBlock, self).__init__()
+        self.conv1 = ConvLayer(in_channels, in_channels, 3, bias=True, activate=True)
+        self.conv2 = ConvLayer(
+            in_channels,
+            out_channels,
+            3,
+            downsample=True,
+            interpolation_mode=interpolation_mode,
+            bias=True,
+            activate=True,
+        )
+        self.skip = ConvLayer(
+            in_channels,
+            out_channels,
+            1,
+            downsample=True,
+            interpolation_mode=interpolation_mode,
+            bias=False,
+            activate=False,
+        )
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.conv2(out)
+        skip = self.skip(x)
+        out = (out + skip) / math.sqrt(2)
+        return out

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/stylegan2_clean_arch.py ADDED Viewed

	@@ -0,0 +1,453 @@

+# pylint: skip-file
+# type: ignore
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.nn import init
+from torch.nn.modules.batchnorm import _BatchNorm
+@torch.no_grad()
+def default_init_weights(module_list, scale=1, bias_fill=0, **kwargs):
+    """Initialize network weights.
+    Args:
+        module_list (list[nn.Module] | nn.Module): Modules to be initialized.
+        scale (float): Scale initialized weights, especially for residual
+            blocks. Default: 1.
+        bias_fill (float): The value to fill bias. Default: 0
+        kwargs (dict): Other arguments for initialization function.
+    """
+    if not isinstance(module_list, list):
+        module_list = [module_list]
+    for module in module_list:
+        for m in module.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, **kwargs)
+                m.weight.data *= scale
+                if m.bias is not None:
+                    m.bias.data.fill_(bias_fill)
+            elif isinstance(m, nn.Linear):
+                init.kaiming_normal_(m.weight, **kwargs)
+                m.weight.data *= scale
+                if m.bias is not None:
+                    m.bias.data.fill_(bias_fill)
+            elif isinstance(m, _BatchNorm):
+                init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    m.bias.data.fill_(bias_fill)
+class NormStyleCode(nn.Module):
+    def forward(self, x):
+        """Normalize the style codes.
+        Args:
+            x (Tensor): Style codes with shape (b, c).
+        Returns:
+            Tensor: Normalized tensor.
+        """
+        return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8)
+class ModulatedConv2d(nn.Module):
+    """Modulated Conv2d used in StyleGAN2.
+    There is no bias in ModulatedConv2d.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Size of the convolving kernel.
+        num_style_feat (int): Channel number of style features.
+        demodulate (bool): Whether to demodulate in the conv layer. Default: True.
+        sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None.
+        eps (float): A value added to the denominator for numerical stability. Default: 1e-8.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        num_style_feat,
+        demodulate=True,
+        sample_mode=None,
+        eps=1e-8,
+    ):
+        super(ModulatedConv2d, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.demodulate = demodulate
+        self.sample_mode = sample_mode
+        self.eps = eps
+        # modulation inside each modulated conv
+        self.modulation = nn.Linear(num_style_feat, in_channels, bias=True)
+        # initialization
+        default_init_weights(
+            self.modulation,
+            scale=1,
+            bias_fill=1,
+            a=0,
+            mode="fan_in",
+            nonlinearity="linear",
+        )
+        self.weight = nn.Parameter(
+            torch.randn(1, out_channels, in_channels, kernel_size, kernel_size)
+            / math.sqrt(in_channels * kernel_size**2)
+        )
+        self.padding = kernel_size // 2
+    def forward(self, x, style):
+        """Forward function.
+        Args:
+            x (Tensor): Tensor with shape (b, c, h, w).
+            style (Tensor): Tensor with shape (b, num_style_feat).
+        Returns:
+            Tensor: Modulated tensor after convolution.
+        """
+        b, c, h, w = x.shape  # c = c_in
+        # weight modulation
+        style = self.modulation(style).view(b, 1, c, 1, 1)
+        # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1)
+        weight = self.weight * style  # (b, c_out, c_in, k, k)
+        if self.demodulate:
+            demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps)
+            weight = weight * demod.view(b, self.out_channels, 1, 1, 1)
+        weight = weight.view(
+            b * self.out_channels, c, self.kernel_size, self.kernel_size
+        )
+        # upsample or downsample if necessary
+        if self.sample_mode == "upsample":
+            x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=False)
+        elif self.sample_mode == "downsample":
+            x = F.interpolate(x, scale_factor=0.5, mode="bilinear", align_corners=False)
+        b, c, h, w = x.shape
+        x = x.view(1, b * c, h, w)
+        # weight: (b*c_out, c_in, k, k), groups=b
+        out = F.conv2d(x, weight, padding=self.padding, groups=b)
+        out = out.view(b, self.out_channels, *out.shape[2:4])
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}(in_channels={self.in_channels}, out_channels={self.out_channels}, "
+            f"kernel_size={self.kernel_size}, demodulate={self.demodulate}, sample_mode={self.sample_mode})"
+        )
+class StyleConv(nn.Module):
+    """Style conv used in StyleGAN2.
+    Args:
+        in_channels (int): Channel number of the input.
+        out_channels (int): Channel number of the output.
+        kernel_size (int): Size of the convolving kernel.
+        num_style_feat (int): Channel number of style features.
+        demodulate (bool): Whether demodulate in the conv layer. Default: True.
+        sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        num_style_feat,
+        demodulate=True,
+        sample_mode=None,
+    ):
+        super(StyleConv, self).__init__()
+        self.modulated_conv = ModulatedConv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            num_style_feat,
+            demodulate=demodulate,
+            sample_mode=sample_mode,
+        )
+        self.weight = nn.Parameter(torch.zeros(1))  # for noise injection
+        self.bias = nn.Parameter(torch.zeros(1, out_channels, 1, 1))
+        self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+    def forward(self, x, style, noise=None):
+        # modulate
+        out = self.modulated_conv(x, style) * 2**0.5  # for conversion
+        # noise injection
+        if noise is None:
+            b, _, h, w = out.shape
+            noise = out.new_empty(b, 1, h, w).normal_()
+        out = out + self.weight * noise
+        # add bias
+        out = out + self.bias
+        # activation
+        out = self.activate(out)
+        return out
+class ToRGB(nn.Module):
+    """To RGB (image space) from features.
+    Args:
+        in_channels (int): Channel number of input.
+        num_style_feat (int): Channel number of style features.
+        upsample (bool): Whether to upsample. Default: True.
+    """
+    def __init__(self, in_channels, num_style_feat, upsample=True):
+        super(ToRGB, self).__init__()
+        self.upsample = upsample
+        self.modulated_conv = ModulatedConv2d(
+            in_channels,
+            3,
+            kernel_size=1,
+            num_style_feat=num_style_feat,
+            demodulate=False,
+            sample_mode=None,
+        )
+        self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
+    def forward(self, x, style, skip=None):
+        """Forward function.
+        Args:
+            x (Tensor): Feature tensor with shape (b, c, h, w).
+            style (Tensor): Tensor with shape (b, num_style_feat).
+            skip (Tensor): Base/skip tensor. Default: None.
+        Returns:
+            Tensor: RGB images.
+        """
+        out = self.modulated_conv(x, style)
+        out = out + self.bias
+        if skip is not None:
+            if self.upsample:
+                skip = F.interpolate(
+                    skip, scale_factor=2, mode="bilinear", align_corners=False
+                )
+            out = out + skip
+        return out
+class ConstantInput(nn.Module):
+    """Constant input.
+    Args:
+        num_channel (int): Channel number of constant input.
+        size (int): Spatial size of constant input.
+    """
+    def __init__(self, num_channel, size):
+        super(ConstantInput, self).__init__()
+        self.weight = nn.Parameter(torch.randn(1, num_channel, size, size))
+    def forward(self, batch):
+        out = self.weight.repeat(batch, 1, 1, 1)
+        return out
+class StyleGAN2GeneratorClean(nn.Module):
+    """Clean version of StyleGAN2 Generator.
+    Args:
+        out_size (int): The spatial size of outputs.
+        num_style_feat (int): Channel number of style features. Default: 512.
+        num_mlp (int): Layer number of MLP style layers. Default: 8.
+        channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+        narrow (float): Narrow ratio for channels. Default: 1.0.
+    """
+    def __init__(
+        self, out_size, num_style_feat=512, num_mlp=8, channel_multiplier=2, narrow=1
+    ):
+        super(StyleGAN2GeneratorClean, self).__init__()
+        # Style MLP layers
+        self.num_style_feat = num_style_feat
+        style_mlp_layers = [NormStyleCode()]
+        for i in range(num_mlp):
+            style_mlp_layers.extend(
+                [
+                    nn.Linear(num_style_feat, num_style_feat, bias=True),
+                    nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                ]
+            )
+        self.style_mlp = nn.Sequential(*style_mlp_layers)
+        # initialization
+        default_init_weights(
+            self.style_mlp,
+            scale=1,
+            bias_fill=0,
+            a=0.2,
+            mode="fan_in",
+            nonlinearity="leaky_relu",
+        )
+        # channel list
+        channels = {
+            "4": int(512 * narrow),
+            "8": int(512 * narrow),
+            "16": int(512 * narrow),
+            "32": int(512 * narrow),
+            "64": int(256 * channel_multiplier * narrow),
+            "128": int(128 * channel_multiplier * narrow),
+            "256": int(64 * channel_multiplier * narrow),
+            "512": int(32 * channel_multiplier * narrow),
+            "1024": int(16 * channel_multiplier * narrow),
+        }
+        self.channels = channels
+        self.constant_input = ConstantInput(channels["4"], size=4)
+        self.style_conv1 = StyleConv(
+            channels["4"],
+            channels["4"],
+            kernel_size=3,
+            num_style_feat=num_style_feat,
+            demodulate=True,
+            sample_mode=None,
+        )
+        self.to_rgb1 = ToRGB(channels["4"], num_style_feat, upsample=False)
+        self.log_size = int(math.log(out_size, 2))
+        self.num_layers = (self.log_size - 2) * 2 + 1
+        self.num_latent = self.log_size * 2 - 2
+        self.style_convs = nn.ModuleList()
+        self.to_rgbs = nn.ModuleList()
+        self.noises = nn.Module()
+        in_channels = channels["4"]
+        # noise
+        for layer_idx in range(self.num_layers):
+            resolution = 2 ** ((layer_idx + 5) // 2)
+            shape = [1, 1, resolution, resolution]
+            self.noises.register_buffer(f"noise{layer_idx}", torch.randn(*shape))
+        # style convs and to_rgbs
+        for i in range(3, self.log_size + 1):
+            out_channels = channels[f"{2**i}"]
+            self.style_convs.append(
+                StyleConv(
+                    in_channels,
+                    out_channels,
+                    kernel_size=3,
+                    num_style_feat=num_style_feat,
+                    demodulate=True,
+                    sample_mode="upsample",
+                )
+            )
+            self.style_convs.append(
+                StyleConv(
+                    out_channels,
+                    out_channels,
+                    kernel_size=3,
+                    num_style_feat=num_style_feat,
+                    demodulate=True,
+                    sample_mode=None,
+                )
+            )
+            self.to_rgbs.append(ToRGB(out_channels, num_style_feat, upsample=True))
+            in_channels = out_channels
+    def make_noise(self):
+        """Make noise for noise injection."""
+        device = self.constant_input.weight.device
+        noises = [torch.randn(1, 1, 4, 4, device=device)]
+        for i in range(3, self.log_size + 1):
+            for _ in range(2):
+                noises.append(torch.randn(1, 1, 2**i, 2**i, device=device))
+        return noises
+    def get_latent(self, x):
+        return self.style_mlp(x)
+    def mean_latent(self, num_latent):
+        latent_in = torch.randn(
+            num_latent, self.num_style_feat, device=self.constant_input.weight.device
+        )
+        latent = self.style_mlp(latent_in).mean(0, keepdim=True)
+        return latent
+    def forward(
+        self,
+        styles,
+        input_is_latent=False,
+        noise=None,
+        randomize_noise=True,
+        truncation=1,
+        truncation_latent=None,
+        inject_index=None,
+        return_latents=False,
+    ):
+        """Forward function for StyleGAN2GeneratorClean.
+        Args:
+            styles (list[Tensor]): Sample codes of styles.
+            input_is_latent (bool): Whether input is latent style. Default: False.
+            noise (Tensor | None): Input noise or None. Default: None.
+            randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+            truncation (float): The truncation ratio. Default: 1.
+            truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
+            inject_index (int | None): The injection index for mixing noise. Default: None.
+            return_latents (bool): Whether to return style latents. Default: False.
+        """
+        # style codes -> latents with Style MLP layer
+        if not input_is_latent:
+            styles = [self.style_mlp(s) for s in styles]
+        # noises
+        if noise is None:
+            if randomize_noise:
+                noise = [None] * self.num_layers  # for each style conv layer
+            else:  # use the stored noise
+                noise = [
+                    getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
+                ]
+        # style truncation
+        if truncation < 1:
+            style_truncation = []
+            for style in styles:
+                style_truncation.append(
+                    truncation_latent + truncation * (style - truncation_latent)
+                )
+            styles = style_truncation
+        # get style latents with injection
+        if len(styles) == 1:
+            inject_index = self.num_latent
+            if styles[0].ndim < 3:
+                # repeat latent code for all the layers
+                latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            else:  # used for encoder with different latent code for each layer
+                latent = styles[0]
+        elif len(styles) == 2:  # mixing noises
+            if inject_index is None:
+                inject_index = random.randint(1, self.num_latent - 1)
+            latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            latent2 = (
+                styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
+            )
+            latent = torch.cat([latent1, latent2], 1)
+        # main generation
+        out = self.constant_input(latent.shape[0])
+        out = self.style_conv1(out, latent[:, 0], noise=noise[0])
+        skip = self.to_rgb1(out, latent[:, 1])
+        i = 1
+        for conv1, conv2, noise1, noise2, to_rgb in zip(
+            self.style_convs[::2],
+            self.style_convs[1::2],
+            noise[1::2],
+            noise[2::2],
+            self.to_rgbs,
+        ):
+            out = conv1(out, latent[:, i], noise=noise1)
+            out = conv2(out, latent[:, i + 1], noise=noise2)
+            skip = to_rgb(out, latent[:, i + 2], skip)  # feature back to the rgb space
+            i += 2
+        image = skip
+        if return_latents:
+            return image, latent
+        else:
+            return image, None

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/face/upfirdn2d.py ADDED Viewed

	@@ -0,0 +1,194 @@

+# pylint: skip-file
+# type: ignore
+# modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py  # noqa:E501
+import os
+import torch
+from torch.autograd import Function
+from torch.nn import functional as F
+upfirdn2d_ext = None
+class UpFirDn2dBackward(Function):
+    @staticmethod
+    def forward(
+        ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size
+    ):
+        up_x, up_y = up
+        down_x, down_y = down
+        g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad
+        grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)
+        grad_input = upfirdn2d_ext.upfirdn2d(
+            grad_output,
+            grad_kernel,
+            down_x,
+            down_y,
+            up_x,
+            up_y,
+            g_pad_x0,
+            g_pad_x1,
+            g_pad_y0,
+            g_pad_y1,
+        )
+        grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3])
+        ctx.save_for_backward(kernel)
+        pad_x0, pad_x1, pad_y0, pad_y1 = pad
+        ctx.up_x = up_x
+        ctx.up_y = up_y
+        ctx.down_x = down_x
+        ctx.down_y = down_y
+        ctx.pad_x0 = pad_x0
+        ctx.pad_x1 = pad_x1
+        ctx.pad_y0 = pad_y0
+        ctx.pad_y1 = pad_y1
+        ctx.in_size = in_size
+        ctx.out_size = out_size
+        return grad_input
+    @staticmethod
+    def backward(ctx, gradgrad_input):
+        (kernel,) = ctx.saved_tensors
+        gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1)
+        gradgrad_out = upfirdn2d_ext.upfirdn2d(
+            gradgrad_input,
+            kernel,
+            ctx.up_x,
+            ctx.up_y,
+            ctx.down_x,
+            ctx.down_y,
+            ctx.pad_x0,
+            ctx.pad_x1,
+            ctx.pad_y0,
+            ctx.pad_y1,
+        )
+        # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0],
+        #                                  ctx.out_size[1], ctx.in_size[3])
+        gradgrad_out = gradgrad_out.view(
+            ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]
+        )
+        return gradgrad_out, None, None, None, None, None, None, None, None
+class UpFirDn2d(Function):
+    @staticmethod
+    def forward(ctx, input, kernel, up, down, pad):
+        up_x, up_y = up
+        down_x, down_y = down
+        pad_x0, pad_x1, pad_y0, pad_y1 = pad
+        kernel_h, kernel_w = kernel.shape
+        _, channel, in_h, in_w = input.shape
+        ctx.in_size = input.shape
+        input = input.reshape(-1, in_h, in_w, 1)
+        ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1]))
+        out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
+        out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
+        ctx.out_size = (out_h, out_w)
+        ctx.up = (up_x, up_y)
+        ctx.down = (down_x, down_y)
+        ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1)
+        g_pad_x0 = kernel_w - pad_x0 - 1
+        g_pad_y0 = kernel_h - pad_y0 - 1
+        g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1
+        g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1
+        ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1)
+        out = upfirdn2d_ext.upfirdn2d(
+            input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
+        )
+        # out = out.view(major, out_h, out_w, minor)
+        out = out.view(-1, channel, out_h, out_w)
+        return out
+    @staticmethod
+    def backward(ctx, grad_output):
+        kernel, grad_kernel = ctx.saved_tensors
+        grad_input = UpFirDn2dBackward.apply(
+            grad_output,
+            kernel,
+            grad_kernel,
+            ctx.up,
+            ctx.down,
+            ctx.pad,
+            ctx.g_pad,
+            ctx.in_size,
+            ctx.out_size,
+        )
+        return grad_input, None, None, None, None
+def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
+    if input.device.type == "cpu":
+        out = upfirdn2d_native(
+            input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1]
+        )
+    else:
+        out = UpFirDn2d.apply(
+            input, kernel, (up, up), (down, down), (pad[0], pad[1], pad[0], pad[1])
+        )
+    return out
+def upfirdn2d_native(
+    input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
+):
+    _, channel, in_h, in_w = input.shape
+    input = input.reshape(-1, in_h, in_w, 1)
+    _, in_h, in_w, minor = input.shape
+    kernel_h, kernel_w = kernel.shape
+    out = input.view(-1, in_h, 1, in_w, 1, minor)
+    out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
+    out = out.view(-1, in_h * up_y, in_w * up_x, minor)
+    out = F.pad(
+        out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]
+    )
+    out = out[
+        :,
+        max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0),
+        max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0),
+        :,
+    ]
+    out = out.permute(0, 3, 1, 2)
+    out = out.reshape(
+        [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]
+    )
+    w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
+    out = F.conv2d(out, w)
+    out = out.reshape(
+        -1,
+        minor,
+        in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1,
+        in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
+    )
+    out = out.permute(0, 2, 3, 1)
+    out = out[:, ::down_y, ::down_x, :]
+    out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
+    out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
+    return out.view(-1, channel, out_h, out_w)

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/timm/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2019 Ross Wightman
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/timm/drop.py ADDED Viewed

	@@ -0,0 +1,223 @@

+""" DropBlock, DropPath
+PyTorch implementations of DropBlock and DropPath (Stochastic Depth) regularization layers.
+Papers:
+DropBlock: A regularization method for convolutional networks (https://arxiv.org/abs/1810.12890)
+Deep Networks with Stochastic Depth (https://arxiv.org/abs/1603.09382)
+Code:
+DropBlock impl inspired by two Tensorflow impl that I liked:
+ - https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py#L74
+ - https://github.com/clovaai/assembled-cnn/blob/master/nets/blocks.py
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def drop_block_2d(
+    x,
+    drop_prob: float = 0.1,
+    block_size: int = 7,
+    gamma_scale: float = 1.0,
+    with_noise: bool = False,
+    inplace: bool = False,
+    batchwise: bool = False,
+):
+    """DropBlock. See https://arxiv.org/pdf/1810.12890.pdf
+    DropBlock with an experimental gaussian noise option. This layer has been tested on a few training
+    runs with success, but needs further validation and possibly optimization for lower runtime impact.
+    """
+    _, C, H, W = x.shape
+    total_size = W * H
+    clipped_block_size = min(block_size, min(W, H))
+    # seed_drop_rate, the gamma parameter
+    gamma = (
+        gamma_scale
+        * drop_prob
+        * total_size
+        / clipped_block_size**2
+        / ((W - block_size + 1) * (H - block_size + 1))
+    )
+    # Forces the block to be inside the feature map.
+    w_i, h_i = torch.meshgrid(
+        torch.arange(W).to(x.device), torch.arange(H).to(x.device)
+    )
+    valid_block = (
+        (w_i >= clipped_block_size // 2) & (w_i < W - (clipped_block_size - 1) // 2)
+    ) & ((h_i >= clipped_block_size // 2) & (h_i < H - (clipped_block_size - 1) // 2))
+    valid_block = torch.reshape(valid_block, (1, 1, H, W)).to(dtype=x.dtype)
+    if batchwise:
+        # one mask for whole batch, quite a bit faster
+        uniform_noise = torch.rand((1, C, H, W), dtype=x.dtype, device=x.device)
+    else:
+        uniform_noise = torch.rand_like(x)
+    block_mask = ((2 - gamma - valid_block + uniform_noise) >= 1).to(dtype=x.dtype)
+    block_mask = -F.max_pool2d(
+        -block_mask,
+        kernel_size=clipped_block_size,  # block_size,
+        stride=1,
+        padding=clipped_block_size // 2,
+    )
+    if with_noise:
+        normal_noise = (
+            torch.randn((1, C, H, W), dtype=x.dtype, device=x.device)
+            if batchwise
+            else torch.randn_like(x)
+        )
+        if inplace:
+            x.mul_(block_mask).add_(normal_noise * (1 - block_mask))
+        else:
+            x = x * block_mask + normal_noise * (1 - block_mask)
+    else:
+        normalize_scale = (
+            block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-7)
+        ).to(x.dtype)
+        if inplace:
+            x.mul_(block_mask * normalize_scale)
+        else:
+            x = x * block_mask * normalize_scale
+    return x
+def drop_block_fast_2d(
+    x: torch.Tensor,
+    drop_prob: float = 0.1,
+    block_size: int = 7,
+    gamma_scale: float = 1.0,
+    with_noise: bool = False,
+    inplace: bool = False,
+):
+    """DropBlock. See https://arxiv.org/pdf/1810.12890.pdf
+    DropBlock with an experimental gaussian noise option. Simplied from above without concern for valid
+    block mask at edges.
+    """
+    _, _, H, W = x.shape
+    total_size = W * H
+    clipped_block_size = min(block_size, min(W, H))
+    gamma = (
+        gamma_scale
+        * drop_prob
+        * total_size
+        / clipped_block_size**2
+        / ((W - block_size + 1) * (H - block_size + 1))
+    )
+    block_mask = torch.empty_like(x).bernoulli_(gamma)
+    block_mask = F.max_pool2d(
+        block_mask.to(x.dtype),
+        kernel_size=clipped_block_size,
+        stride=1,
+        padding=clipped_block_size // 2,
+    )
+    if with_noise:
+        normal_noise = torch.empty_like(x).normal_()
+        if inplace:
+            x.mul_(1.0 - block_mask).add_(normal_noise * block_mask)
+        else:
+            x = x * (1.0 - block_mask) + normal_noise * block_mask
+    else:
+        block_mask = 1 - block_mask
+        normalize_scale = (
+            block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-6)
+        ).to(dtype=x.dtype)
+        if inplace:
+            x.mul_(block_mask * normalize_scale)
+        else:
+            x = x * block_mask * normalize_scale
+    return x
+class DropBlock2d(nn.Module):
+    """DropBlock. See https://arxiv.org/pdf/1810.12890.pdf"""
+    def __init__(
+        self,
+        drop_prob: float = 0.1,
+        block_size: int = 7,
+        gamma_scale: float = 1.0,
+        with_noise: bool = False,
+        inplace: bool = False,
+        batchwise: bool = False,
+        fast: bool = True,
+    ):
+        super(DropBlock2d, self).__init__()
+        self.drop_prob = drop_prob
+        self.gamma_scale = gamma_scale
+        self.block_size = block_size
+        self.with_noise = with_noise
+        self.inplace = inplace
+        self.batchwise = batchwise
+        self.fast = fast  # FIXME finish comparisons of fast vs not
+    def forward(self, x):
+        if not self.training or not self.drop_prob:
+            return x
+        if self.fast:
+            return drop_block_fast_2d(
+                x,
+                self.drop_prob,
+                self.block_size,
+                self.gamma_scale,
+                self.with_noise,
+                self.inplace,
+            )
+        else:
+            return drop_block_2d(
+                x,
+                self.drop_prob,
+                self.block_size,
+                self.gamma_scale,
+                self.with_noise,
+                self.inplace,
+                self.batchwise,
+            )
+def drop_path(
+    x, drop_prob: float = 0.0, training: bool = False, scale_by_keep: bool = True
+):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+    """
+    if drop_prob == 0.0 or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (
+        x.ndim - 1
+    )  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+    if keep_prob > 0.0 and scale_by_keep:
+        random_tensor.div_(keep_prob)
+    return x * random_tensor
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks)."""
+    def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
+    def extra_repr(self):
+        return f"drop_prob={round(self.drop_prob,3):0.3f}"

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/timm/helpers.py ADDED Viewed

	@@ -0,0 +1,31 @@

+""" Layer/Module Helpers
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import collections.abc
+from itertools import repeat
+# From PyTorch internals
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
+            return x
+        return tuple(repeat(x, n))
+    return parse
+to_1tuple = _ntuple(1)
+to_2tuple = _ntuple(2)
+to_3tuple = _ntuple(3)
+to_4tuple = _ntuple(4)
+to_ntuple = _ntuple
+def make_divisible(v, divisor=8, min_value=None, round_limit=0.9):
+    min_value = min_value or divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < round_limit * v:
+        new_v += divisor
+    return new_v

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/architecture/timm/weight_init.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import math
+import warnings
+import torch
+from torch.nn.init import _calculate_fan_in_and_fan_out
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn(
+            "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+            "The distribution of values may be incorrect.",
+            stacklevel=2,
+        )
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.0))
+        tensor.add_(mean)
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+def trunc_normal_(
+    tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0
+) -> torch.Tensor:
+    r"""Fills the input Tensor with values drawn from a truncated
+    normal distribution. The values are effectively drawn from the
+    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+    with values outside :math:`[a, b]` redrawn until they are within
+    the bounds. The method used for generating the random values works
+    best when :math:`a \leq \text{mean} \leq b`.
+    NOTE: this impl is similar to the PyTorch trunc_normal_, the bounds [a, b] are
+    applied while sampling the normal with mean/std applied, therefore a, b args
+    should be adjusted to match the range of mean, std args.
+    Args:
+        tensor: an n-dimensional `torch.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        a: the minimum cutoff value
+        b: the maximum cutoff value
+    Examples:
+        >>> w = torch.empty(3, 5)
+        >>> nn.init.trunc_normal_(w)
+    """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
+def trunc_normal_tf_(
+    tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0
+) -> torch.Tensor:
+    r"""Fills the input Tensor with values drawn from a truncated
+    normal distribution. The values are effectively drawn from the
+    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+    with values outside :math:`[a, b]` redrawn until they are within
+    the bounds. The method used for generating the random values works
+    best when :math:`a \leq \text{mean} \leq b`.
+    NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the
+    bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0
+    and the result is subsquently scaled and shifted by the mean and std args.
+    Args:
+        tensor: an n-dimensional `torch.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        a: the minimum cutoff value
+        b: the maximum cutoff value
+    Examples:
+        >>> w = torch.empty(3, 5)
+        >>> nn.init.trunc_normal_(w)
+    """
+    _no_grad_trunc_normal_(tensor, 0, 1.0, a, b)
+    with torch.no_grad():
+        tensor.mul_(std).add_(mean)
+    return tensor
+def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"):
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
+    if mode == "fan_in":
+        denom = fan_in
+    elif mode == "fan_out":
+        denom = fan_out
+    elif mode == "fan_avg":
+        denom = (fan_in + fan_out) / 2
+    variance = scale / denom  # type: ignore
+    if distribution == "truncated_normal":
+        # constant is stddev of standard normal truncated to (-2, 2)
+        trunc_normal_tf_(tensor, std=math.sqrt(variance) / 0.87962566103423978)
+    elif distribution == "normal":
+        tensor.normal_(std=math.sqrt(variance))
+    elif distribution == "uniform":
+        bound = math.sqrt(3 * variance)
+        # pylint: disable=invalid-unary-operand-type
+        tensor.uniform_(-bound, bound)
+    else:
+        raise ValueError(f"invalid distribution {distribution}")
+def lecun_normal_(tensor):
+    variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal")

LayerDiffuse-gradio-unofficial/ComfyUI/comfy_extras/chainner_models/model_loading.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import logging as logger
+from .architecture.DAT import DAT
+from .architecture.face.codeformer import CodeFormer
+from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean
+from .architecture.face.restoreformer_arch import RestoreFormer
+from .architecture.HAT import HAT
+from .architecture.LaMa import LaMa
+from .architecture.OmniSR.OmniSR import OmniSR
+from .architecture.RRDB import RRDBNet as ESRGAN
+from .architecture.SCUNet import SCUNet
+from .architecture.SPSR import SPSRNet as SPSR
+from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2
+from .architecture.SwiftSRGAN import Generator as SwiftSRGAN
+from .architecture.Swin2SR import Swin2SR
+from .architecture.SwinIR import SwinIR
+from .types import PyTorchModel
+class UnsupportedModel(Exception):
+    pass
+def load_state_dict(state_dict) -> PyTorchModel:
+    logger.debug(f"Loading state dict into pytorch model arch")
+    state_dict_keys = list(state_dict.keys())
+    if "params_ema" in state_dict_keys:
+        state_dict = state_dict["params_ema"]
+    elif "params-ema" in state_dict_keys:
+        state_dict = state_dict["params-ema"]
+    elif "params" in state_dict_keys:
+        state_dict = state_dict["params"]
+    state_dict_keys = list(state_dict.keys())
+    # SRVGGNet Real-ESRGAN (v2)
+    if "body.0.weight" in state_dict_keys and "body.1.weight" in state_dict_keys:
+        model = RealESRGANv2(state_dict)
+    # SPSR (ESRGAN with lots of extra layers)
+    elif "f_HR_conv1.0.weight" in state_dict:
+        model = SPSR(state_dict)
+    # Swift-SRGAN
+    elif (
+        "model" in state_dict_keys
+        and "initial.cnn.depthwise.weight" in state_dict["model"].keys()
+    ):
+        model = SwiftSRGAN(state_dict)
+    # SwinIR, Swin2SR, HAT
+    elif "layers.0.residual_group.blocks.0.norm1.weight" in state_dict_keys:
+        if (
+            "layers.0.residual_group.blocks.0.conv_block.cab.0.weight"
+            in state_dict_keys
+        ):
+            model = HAT(state_dict)
+        elif "patch_embed.proj.weight" in state_dict_keys:
+            model = Swin2SR(state_dict)
+        else:
+            model = SwinIR(state_dict)
+    # GFPGAN
+    elif (
+        "toRGB.0.weight" in state_dict_keys
+        and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys
+    ):
+        model = GFPGANv1Clean(state_dict)
+    # RestoreFormer
+    elif (
+        "encoder.conv_in.weight" in state_dict_keys
+        and "encoder.down.0.block.0.norm1.weight" in state_dict_keys
+    ):
+        model = RestoreFormer(state_dict)
+    elif (
+        "encoder.blocks.0.weight" in state_dict_keys
+        and "quantize.embedding.weight" in state_dict_keys
+    ):
+        model = CodeFormer(state_dict)
+    # LaMa
+    elif (
+        "model.model.1.bn_l.running_mean" in state_dict_keys
+        or "generator.model.1.bn_l.running_mean" in state_dict_keys
+    ):
+        model = LaMa(state_dict)
+    # Omni-SR
+    elif "residual_layer.0.residual_layer.0.layer.0.fn.0.weight" in state_dict_keys:
+        model = OmniSR(state_dict)
+    # SCUNet
+    elif "m_head.0.weight" in state_dict_keys and "m_tail.0.weight" in state_dict_keys:
+        model = SCUNet(state_dict)
+    # DAT
+    elif "layers.0.blocks.2.attn.attn_mask_0" in state_dict_keys:
+        model = DAT(state_dict)
+    # Regular ESRGAN, "new-arch" ESRGAN, Real-ESRGAN v1
+    else:
+        try:
+            model = ESRGAN(state_dict)
+        except:
+            # pylint: disable=raise-missing-from
+            raise UnsupportedModel
+    return model