Spaces:

Egrt
/

GCycleGAN

Build error

App Files Files Community

Egrt commited on Oct 31, 2022

Commit

95e767b

1 Parent(s): 2669933

init

Browse files

Files changed (21) hide show

.gitignore +141 -0
app.py +38 -0
cyclegan.py +116 -0
img/7134850@N05_identity_2@7720949260_0.jpg +0 -0
img/7134850@N05_identity_2@7720963358_0.jpg +0 -0
img/7134850@N05_identity_2@8978938957_3.jpg +0 -0
img/7134850@N05_identity_2@8980174892_1.jpg +0 -0
img/7154980@N03_identity_0@2379147786_0.jpg +0 -0
img/epoch_14_results.png +0 -0
nets/__init__.py +0 -0
nets/cyclegan.py +923 -0
nets/resnest/__init__.py +2 -0
nets/resnest/ablation.py +106 -0
nets/resnest/resnest.py +60 -0
nets/resnest/resnet.py +310 -0
nets/resnest/splat.py +99 -0
utils/__init__.py +0 -0
utils/callbacks.py +65 -0
utils/dataloader.py +45 -0
utils/utils.py +136 -0
utils/utils_fit.py +249 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,141 @@

+# ignore map, miou, datasets
+map_out/
+miou_out/
+VOCdevkit/
+datasets/
+Medical_Datasets/
+lfw/
+logs/
+model_data/
+.temp_map_out/
+results/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

app.py ADDED Viewed

	@@ -0,0 +1,38 @@

+'''
+Author: Egrt
+Date: 2022-01-13 13:34:10
+LastEditors: Egrt
+LastEditTime: 2022-10-17 10:23:29
+FilePath: \MaskGAN\app.py
+'''
+from cyclegan import CYCLEGAN
+import gradio as gr
+import os
+cyclegan = CYCLEGAN()
+# --------模型推理---------- #
+'''
+description:
+param {*} img 戴眼镜的人脸图片 Image
+return {*} r_image 去遮挡的人脸图片 Image
+'''
+def inference(img):
+    r_image = cyclegan.detect_image(img)
+    return r_image
+# --------网页信息---------- #
+title = "融合无监督的戴眼镜遮挡人脸重建"
+description = "使用生成对抗网络对戴眼镜遮挡人脸重建,能够有效地去除眼镜遮挡。  @西南科技大学智能控制与图像处理研究室"
+article = "<p style='text-align: center'>DeMaskGAN: Face Restoration Using Swin Transformer </p>"
+example_img_dir  = 'img'
+example_img_name = os.listdir(example_img_dir)
+examples=[os.path.join(example_img_dir, image_path) for image_path in example_img_name if image_path.endswith(('.jpg','.jpeg'))]
+gr.Interface(
+    inference,
+    gr.inputs.Image(type="pil", label="Input"),
+    gr.outputs.Image(type="pil", label="Output"),
+    title=title,
+    description=description,
+    article=article,
+    examples=examples
+    ).launch()

cyclegan.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from torch import nn
+from nets.cyclegan import Generator
+from utils.utils import (cvtColor, postprocess_output, preprocess_input,
+                         resize_image, show_config)
+class CYCLEGAN(object):
+    _defaults = {
+        #-----------------------------------------------#
+        #   model_path指向logs文件夹下的权值文件
+        #-----------------------------------------------#
+        "model_path"        : 'model_data/G_model_B2A_last_epoch_weights.pth',
+        #-----------------------------------------------#
+        #   输入图像大小的设置
+        #-----------------------------------------------#
+        "input_shape"       : [112, 112],
+        #-------------------------------#
+        #   是否进行不失真的resize
+        #-------------------------------#
+        "letterbox_image"   : True,
+        #-------------------------------#
+        #   是否使用Cuda
+        #   没有GPU可以设置成False
+        #-------------------------------#
+        "cuda"              : True,
+    }
+    #---------------------------------------------------#
+    #   初始化CYCLEGAN
+    #---------------------------------------------------#
+    def __init__(self, **kwargs):
+        self.__dict__.update(self._defaults)
+        for name, value in kwargs.items():
+            setattr(self, name, value)
+            self._defaults[name] = value
+        self.generate()
+        show_config(**self._defaults)
+    def generate(self):
+        #----------------------------------------#
+        #   创建GAN模型
+        #----------------------------------------#
+        self.net    = Generator(upscale=1, img_size=tuple(self.input_shape),
+                   window_size=7, img_range=1., depths=[3, 3, 3, 3],
+                   embed_dim=60, num_heads=[3, 3, 3, 3], mlp_ratio=1, upsampler='1conv').eval()
+        device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.net.load_state_dict(torch.load(self.model_path, map_location=device))
+        self.net    = self.net.eval()
+        print('{} model loaded.'.format(self.model_path))
+        if self.cuda:
+            self.net = nn.DataParallel(self.net)
+            self.net = self.net.cuda()
+    #---------------------------------------------------#
+    #   生成1x1的图片
+    #---------------------------------------------------#
+    def detect_image(self, image):
+        #---------------------------------------------------------#
+        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
+        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+        #---------------------------------------------------------#
+        image       = cvtColor(image)
+        #---------------------------------------------------#
+        #   获得高宽
+        #---------------------------------------------------#
+        orininal_h  = np.array(image).shape[0]
+        orininal_w  = np.array(image).shape[1]
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #   也可以直接resize进行识别
+        #---------------------------------------------------------#
+        image_data, nw, nh = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
+        with torch.no_grad():
+            images = torch.from_numpy(image_data)
+            if self.cuda:
+                images = images.cuda()
+            #---------------------------------------------------#
+            #   图片传入网络进行预测
+            #---------------------------------------------------#
+            pr = self.net(images)[0]
+            #---------------------------------------------------#
+            #   转为numpy
+            #---------------------------------------------------#
+            pr = pr.permute(1, 2, 0).cpu().numpy()
+            #--------------------------------------#
+            #   将灰条部分截取掉
+            #--------------------------------------#
+            if nw is not None:
+                pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
+                        int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
+            #---------------------------------------------------#
+            #   进行图片的resize
+            #---------------------------------------------------#
+            pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation = cv2.INTER_LINEAR)
+        image = postprocess_output(pr)
+        image = np.clip(image, 0, 255)
+        image = Image.fromarray(np.uint8(image))
+        return image

img/7134850@N05_identity_2@7720949260_0.jpg ADDED Viewed

img/7134850@N05_identity_2@7720963358_0.jpg ADDED Viewed

img/7134850@N05_identity_2@8978938957_3.jpg ADDED Viewed

img/7134850@N05_identity_2@8980174892_1.jpg ADDED Viewed

img/7154980@N03_identity_0@2379147786_0.jpg ADDED Viewed

img/epoch_14_results.png ADDED Viewed

nets/__init__.py ADDED Viewed

File without changes

nets/cyclegan.py ADDED Viewed

	@@ -0,0 +1,923 @@

+# -----------------------------------------------------------------------------------
+# SwinIR: Image Restoration Using Swin Transformer, https://arxiv.org/abs/2108.10257
+# Originally Written by Ze Liu, Modified by Jingyun Liang.
+# -----------------------------------------------------------------------------------
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+class WindowAttention(nn.Module):
+    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x, mask=None):
+        """
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, N, N)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+    def extra_repr(self) -> str:
+        return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}'
+    def flops(self, N):
+        # calculate flops for 1 window with token length of N
+        flops = 0
+        # qkv = self.qkv(x)
+        flops += N * self.dim * 3 * self.dim
+        # attn = (q @ k.transpose(-2, -1))
+        flops += self.num_heads * N * (self.dim // self.num_heads) * N
+        #  x = (attn @ v)
+        flops += self.num_heads * N * N * (self.dim // self.num_heads)
+        # x = self.proj(x)
+        flops += N * self.dim * self.dim
+        return flops
+class SwinTransformerBlock(nn.Module):
+    r""" Swin Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resulotion.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        if min(self.input_resolution) <= self.window_size:
+            # if window size is larger than input resolution, we don't partition windows
+            self.shift_size = 0
+            self.window_size = min(self.input_resolution)
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        if self.shift_size > 0:
+            attn_mask = self.calculate_mask(self.input_resolution)
+        else:
+            attn_mask = None
+        self.register_buffer("attn_mask", attn_mask)
+    def calculate_mask(self, x_size):
+        # calculate attention mask for SW-MSA
+        H, W = x_size
+        img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+        h_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        w_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+        return attn_mask
+    def forward(self, x, x_size):
+        H, W = x_size
+        B, L, C = x.shape
+        # assert L == H * W, "input feature has wrong size"
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+        else:
+            shifted_x = x
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
+        # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
+        if self.input_resolution == x_size:
+            attn_windows = self.attn(x_windows, mask=self.attn_mask)  # nW*B, window_size*window_size, C
+        else:
+            attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size).to(x.device))
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+        x = x.view(B, H * W, C)
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
+               f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
+    def flops(self):
+        flops = 0
+        H, W = self.input_resolution
+        # norm1
+        flops += self.dim * H * W
+        # W-MSA/SW-MSA
+        nW = H * W / self.window_size / self.window_size
+        flops += nW * self.attn.flops(self.window_size * self.window_size)
+        # mlp
+        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
+        # norm2
+        flops += self.dim * H * W
+        return flops
+class PatchMerging(nn.Module):
+    r""" Patch Merging Layer.
+    Args:
+        input_resolution (tuple[int]): Resolution of input feature.
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(4 * dim)
+    def forward(self, x):
+        """
+        x: B, H*W, C
+        """
+        H, W = self.input_resolution
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+        assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
+        x = x.view(B, H, W, C)
+        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
+        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
+        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
+        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
+        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
+        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
+        x = self.norm(x)
+        x = self.reduction(x)
+        return x
+    def extra_repr(self) -> str:
+        return f"input_resolution={self.input_resolution}, dim={self.dim}"
+    def flops(self):
+        H, W = self.input_resolution
+        flops = H * W * self.dim
+        flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
+        return flops
+class BasicLayer(nn.Module):
+    """ A basic Swin Transformer layer for one stage.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+        # build blocks
+        self.blocks = nn.ModuleList([
+            SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
+                                 num_heads=num_heads, window_size=window_size,
+                                 shift_size=0 if (i % 2 == 0) else window_size // 2,
+                                 mlp_ratio=mlp_ratio,
+                                 qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                 drop=drop, attn_drop=attn_drop,
+                                 drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                                 norm_layer=norm_layer)
+            for i in range(depth)])
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
+        else:
+            self.downsample = None
+    def forward(self, x, x_size):
+        for blk in self.blocks:
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, x_size)
+            else:
+                x = blk(x, x_size)
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return x
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
+    def flops(self):
+        flops = 0
+        for blk in self.blocks:
+            flops += blk.flops()
+        if self.downsample is not None:
+            flops += self.downsample.flops()
+        return flops
+class RSTB(nn.Module):
+    """Residual Swin Transformer Block (RSTB).
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+        img_size: Input image size.
+        patch_size: Patch size.
+        resi_connection: The convolutional block before residual connection.
+    """
+    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
+                 img_size=224, patch_size=4, resi_connection='1conv'):
+        super(RSTB, self).__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.residual_group = BasicLayer(dim=dim,
+                                         input_resolution=input_resolution,
+                                         depth=depth,
+                                         num_heads=num_heads,
+                                         window_size=window_size,
+                                         mlp_ratio=mlp_ratio,
+                                         qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                         drop=drop, attn_drop=attn_drop,
+                                         drop_path=drop_path,
+                                         norm_layer=norm_layer,
+                                         downsample=downsample,
+                                         use_checkpoint=use_checkpoint)
+        if resi_connection == '1conv':
+            self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
+        elif resi_connection == '3conv':
+            # to save parameters and memory
+            self.conv = nn.Sequential(nn.Conv2d(dim, dim // 4, 3, 1, 1), nn.GELU(),
+                                      nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
+                                      nn.GELU(),
+                                      nn.Conv2d(dim // 4, dim, 3, 1, 1))
+        self.patch_embed = PatchEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim,
+            norm_layer=None)
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim,
+            norm_layer=None)
+    def forward(self, x, x_size):
+        return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x
+    def flops(self):
+        flops = 0
+        flops += self.residual_group.flops()
+        H, W = self.input_resolution
+        flops += H * W * self.dim * self.dim * 9
+        flops += self.patch_embed.flops()
+        flops += self.patch_unembed.flops()
+        return flops
+class PatchEmbed(nn.Module):
+    r""" Image to Patch Embedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+    def forward(self, x):
+        x = x.flatten(2).transpose(1, 2)  # B Ph*Pw C
+        if self.norm is not None:
+            x = self.norm(x)
+        return x
+    def flops(self):
+        flops = 0
+        H, W = self.img_size
+        if self.norm is not None:
+            flops += H * W * self.embed_dim
+        return flops
+class PatchUnEmbed(nn.Module):
+    r""" Image to Patch Unembedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+    def forward(self, x, x_size):
+        B, HW, C = x.shape
+        x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1])  # B Ph*Pw C
+        return x
+    def flops(self):
+        flops = 0
+        return flops
+class Upsample(nn.Sequential):
+    """Upsample module.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat):
+        m = []
+        if (scale & (scale - 1)) == 0:  # scale = 2^n
+            for _ in range(int(math.log(scale, 2))):
+                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
+                m.append(nn.PixelShuffle(2))
+        elif scale == 3:
+            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
+            m.append(nn.PixelShuffle(3))
+        else:
+            raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
+        super(Upsample, self).__init__(*m)
+class UpsampleOneStep(nn.Sequential):
+    """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
+       Used in lightweight SR to save parameters.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
+        self.num_feat = num_feat
+        self.input_resolution = input_resolution
+        m = []
+        m.append(nn.Conv2d(num_feat, (scale ** 2) * num_out_ch, 3, 1, 1))
+        m.append(nn.PixelShuffle(scale))
+        super(UpsampleOneStep, self).__init__(*m)
+    def flops(self):
+        H, W = self.input_resolution
+        flops = H * W * self.num_feat * 3 * 9
+        return flops
+class Generator(nn.Module):
+    r""" SwinIR
+        A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer.
+    Args:
+        img_size (int | tuple(int)): Input image size. Default 64
+        patch_size (int | tuple(int)): Patch size. Default: 1
+        in_chans (int): Number of input image channels. Default: 3
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
+        upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
+        img_range: Image range. 1. or 255.
+        upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
+        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
+    """
+    def __init__(self, img_size=64, patch_size=1, in_chans=3, out_chans=3,
+                 embed_dim=96, depths=[6, 6, 6, 6], num_heads=[6, 6, 6, 6],
+                 window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None,
+                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
+                 norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
+                 use_checkpoint=False, upscale=2, img_range=1., upsampler='', resi_connection='1conv',
+                 **kwargs):
+        super(Generator, self).__init__()
+        num_in_ch = in_chans
+        num_out_ch = out_chans
+        num_feat = 64
+        self.img_range = img_range
+        if in_chans == 3:
+            rgb_mean = (0.4488, 0.4371, 0.4040)
+            self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
+        else:
+            self.mean = torch.zeros(1, 1, 1, 1)
+        self.upscale = upscale
+        self.upsampler = upsampler
+        self.window_size = window_size
+        # -------------浅层特征提取------------ #
+        self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
+        # -------------深层特征提取------------ #
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.num_features = embed_dim
+        self.mlp_ratio = mlp_ratio
+        # -------------将图片划分为不重叠的Patch------------ #
+        self.patch_embed = PatchEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        num_patches = self.patch_embed.num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+        # -------------将重叠的Patch进行融合------------ #
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        # -------------绝对位置编码------------ #
+        if self.ape:
+            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
+            trunc_normal_(self.absolute_pos_embed, std=.02)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        # stochastic depth
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        # build Residual Swin Transformer blocks (RSTB)
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = RSTB(dim=embed_dim,
+                         input_resolution=(patches_resolution[0],
+                                           patches_resolution[1]),
+                         depth=depths[i_layer],
+                         num_heads=num_heads[i_layer],
+                         window_size=window_size,
+                         mlp_ratio=self.mlp_ratio,
+                         qkv_bias=qkv_bias, qk_scale=qk_scale,
+                         drop=drop_rate, attn_drop=attn_drop_rate,
+                         drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],  # no impact on SR results
+                         norm_layer=norm_layer,
+                         downsample=None,
+                         use_checkpoint=use_checkpoint,
+                         img_size=img_size,
+                         patch_size=patch_size,
+                         resi_connection=resi_connection
+                         )
+            self.layers.append(layer)
+        self.norm = norm_layer(self.num_features)
+        # build the last conv layer in deep feature extraction
+        if resi_connection == '1conv':
+            self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
+        elif resi_connection == '3conv':
+            # to save parameters and memory
+            self.conv_after_body = nn.Sequential(nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
+                                                 nn.GELU(),
+                                                 nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
+                                                 nn.GELU(),
+                                                 nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1))
+        # -------------超分辨率重建模块------------ #
+        if self.upsampler == 'pixelshuffle':
+            # for classical SR
+            self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
+                                                      nn.GELU())
+            self.upsample = Upsample(upscale, num_feat)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        elif self.upsampler == 'pixelshuffledirect':
+            # for lightweight SR (to save parameters)
+            self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch,
+                                            (patches_resolution[0], patches_resolution[1]))
+        elif self.upsampler == 'nearest+conv':
+            # for real-world SR (less artifacts)
+            assert self.upscale == 4, 'only support x4 now.'
+            self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
+                                                      nn.GELU())
+            self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+            self.lrelu = nn.GELU()
+        else:
+            # for image denoising and JPEG compression artifact reduction
+            self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'absolute_pos_embed'}
+    @torch.jit.ignore
+    def no_weight_decay_keywords(self):
+        return {'relative_position_bias_table'}
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
+        mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
+        return x
+    def forward_features(self, x):
+        x_size = (x.shape[2], x.shape[3])
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x, x_size)
+        x = self.norm(x)  # B L C
+        x = self.patch_unembed(x, x_size)
+        return x
+    def forward(self, x):
+        H, W = x.shape[2:]
+        x = self.check_image_size(x)
+        self.mean = self.mean.type_as(x)
+        x = (x - self.mean) * self.img_range
+        if self.upsampler == 'pixelshuffle':
+            # for classical SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.conv_last(self.upsample(x))
+        elif self.upsampler == 'pixelshuffledirect':
+            # for lightweight SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.upsample(x)
+        elif self.upsampler == 'nearest+conv':
+            # for real-world SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.lrelu(self.conv_up1(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
+            x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
+            x = self.conv_last(self.lrelu(self.conv_hr(x)))
+        else:
+            # for image denoising and JPEG compression artifact reduction
+            x_first = self.conv_first(x)
+            res = self.conv_after_body(self.forward_features(x_first)) + x_first
+            x = self.conv_last(res)
+        x = x / self.img_range + self.mean
+        return x[:, :, :H*self.upscale, :W*self.upscale]
+    def flops(self):
+        flops = 0
+        H, W = self.patches_resolution
+        flops += H * W * 3 * self.embed_dim * 9
+        flops += self.patch_embed.flops()
+        for i, layer in enumerate(self.layers):
+            flops += layer.flops()
+        flops += H * W * 3 * self.embed_dim * self.embed_dim
+        flops += self.upsample.flops()
+        return flops
+class Discriminator(nn.Module):
+    def __init__(self):
+        super(Discriminator, self).__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=3, padding=1),
+            nn.GELU(),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
+            nn.GELU(),
+            nn.Conv2d(64, 128, kernel_size=3, padding=1),
+            nn.GELU(),
+            nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1),
+            nn.GELU(),
+            nn.Conv2d(128, 256, kernel_size=3, padding=1),
+            nn.GELU(),
+            nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1),
+            nn.GELU(),
+            nn.Conv2d(256, 512, kernel_size=3, padding=1),
+            nn.GELU(),
+            nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
+            nn.GELU(),
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(512, 1024, kernel_size=1),
+            nn.GELU(),
+            nn.Conv2d(1024, 1, kernel_size=1)
+        )
+    def forward(self, x):
+        batch_size = x.size(0)
+        return self.net(x).view(batch_size)
+def compute_gradient_penalty(D, real_samples, fake_samples):
+	alpha = torch.randn(real_samples.size(0), 1, 1, 1)
+	if torch.cuda.is_available():
+		alpha = alpha.cuda()
+	interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True)
+	d_interpolates = D(interpolates)
+	fake = torch.ones(d_interpolates.size())
+	if torch.cuda.is_available():
+		fake = fake.cuda()
+	gradients = torch.autograd.grad(
+        outputs=d_interpolates,
+        inputs=interpolates,
+        grad_outputs=fake,
+        create_graph=True,
+        retain_graph=True,
+        only_inputs=True,
+    )[0]
+	gradients = gradients.view(gradients.size(0), -1)
+	gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
+	return gradient_penalty
+if __name__ == '__main__':
+    upscale = 1
+    window_size = 7
+    height = (110 // upscale // window_size + 1) * window_size
+    width = (110 // upscale // window_size + 1) * window_size
+    model = Generator(upscale=upscale, img_size=(height, width),
+                   window_size=window_size, img_range=1., depths=[6, 6, 6, 6],
+                   embed_dim=60, num_heads=[6, 6, 6, 6], mlp_ratio=4, upsampler='nearest+conv')
+    print(model)
+    # print(height, width, model.flops() / 1e9)
+    x = torch.randn((1, 3, height, width))
+    x = model(x)
+    print(x.shape)

nets/resnest/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .resnest import *
2	+ from .ablation import *

nets/resnest/ablation.py ADDED Viewed

	@@ -0,0 +1,106 @@

+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## Email: zhanghang0704@gmail.com
+## Copyright (c) 2020
+##
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+"""ResNeSt ablation study models"""
+import torch
+from .resnet import ResNet, Bottleneck
+__all__ = ['resnest50_fast_1s1x64d', 'resnest50_fast_2s1x64d', 'resnest50_fast_4s1x64d',
+           'resnest50_fast_1s2x40d', 'resnest50_fast_2s2x40d', 'resnest50_fast_4s2x40d',
+           'resnest50_fast_1s4x24d']
+_url_format = 'https://s3.us-west-1.wasabisys.com/resnest/torch/{}-{}.pth'
+_model_sha256 = {name: checksum for checksum, name in [
+    ('d8fbf808', 'resnest50_fast_1s1x64d'),
+    ('44938639', 'resnest50_fast_2s1x64d'),
+    ('f74f3fc3', 'resnest50_fast_4s1x64d'),
+    ('32830b84', 'resnest50_fast_1s2x40d'),
+    ('9d126481', 'resnest50_fast_2s2x40d'),
+    ('41d14ed0', 'resnest50_fast_4s2x40d'),
+    ('d4a4f76f', 'resnest50_fast_1s4x24d'),
+    ]}
+def short_hash(name):
+    if name not in _model_sha256:
+        raise ValueError('Pretrained model for {name} is not available.'.format(name=name))
+    return _model_sha256[name][:8]
+resnest_model_urls = {name: _url_format.format(name, short_hash(name)) for
+    name in _model_sha256.keys()
+}
+def resnest50_fast_1s1x64d(pretrained=False, root='~/.encoding/models', **kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3],
+                   radix=1, groups=1, bottleneck_width=64,
+                   deep_stem=True, stem_width=32, avg_down=True,
+                   avd=True, avd_first=True, **kwargs)
+    if pretrained:
+        model.load_state_dict(torch.hub.load_state_dict_from_url(
+            resnest_model_urls['resnest50_fast_1s1x64d'], progress=True, check_hash=True))
+    return model
+def resnest50_fast_2s1x64d(pretrained=False, root='~/.encoding/models', **kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3],
+                   radix=2, groups=1, bottleneck_width=64,
+                   deep_stem=True, stem_width=32, avg_down=True,
+                   avd=True, avd_first=True, **kwargs)
+    if pretrained:
+        model.load_state_dict(torch.hub.load_state_dict_from_url(
+            resnest_model_urls['resnest50_fast_2s1x64d'], progress=True, check_hash=True))
+    return model
+def resnest50_fast_4s1x64d(pretrained=False, root='~/.encoding/models', **kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3],
+                   radix=4, groups=1, bottleneck_width=64,
+                   deep_stem=True, stem_width=32, avg_down=True,
+                   avd=True, avd_first=True, **kwargs)
+    if pretrained:
+        model.load_state_dict(torch.hub.load_state_dict_from_url(
+            resnest_model_urls['resnest50_fast_4s1x64d'], progress=True, check_hash=True))
+    return model
+def resnest50_fast_1s2x40d(pretrained=False, root='~/.encoding/models', **kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3],
+                   radix=1, groups=2, bottleneck_width=40,
+                   deep_stem=True, stem_width=32, avg_down=True,
+                   avd=True, avd_first=True, **kwargs)
+    if pretrained:
+        model.load_state_dict(torch.hub.load_state_dict_from_url(
+            resnest_model_urls['resnest50_fast_1s2x40d'], progress=True, check_hash=True))
+    return model
+def resnest50_fast_2s2x40d(pretrained=False, root='~/.encoding/models', **kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3],
+                   radix=2, groups=2, bottleneck_width=40,
+                   deep_stem=True, stem_width=32, avg_down=True,
+                   avd=True, avd_first=True, **kwargs)
+    if pretrained:
+        model.load_state_dict(torch.hub.load_state_dict_from_url(
+            resnest_model_urls['resnest50_fast_2s2x40d'], progress=True, check_hash=True))
+    return model
+def resnest50_fast_4s2x40d(pretrained=False, root='~/.encoding/models', **kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3],
+                   radix=4, groups=2, bottleneck_width=40,
+                   deep_stem=True, stem_width=32, avg_down=True,
+                   avd=True, avd_first=True, **kwargs)
+    if pretrained:
+        model.load_state_dict(torch.hub.load_state_dict_from_url(
+            resnest_model_urls['resnest50_fast_4s2x40d'], progress=True, check_hash=True))
+    return model
+def resnest50_fast_1s4x24d(pretrained=False, root='~/.encoding/models', **kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3],
+                   radix=1, groups=4, bottleneck_width=24,
+                   deep_stem=True, stem_width=32, avg_down=True,
+                   avd=True, avd_first=True, **kwargs)
+    if pretrained:
+        model.load_state_dict(torch.hub.load_state_dict_from_url(
+            resnest_model_urls['resnest50_fast_1s4x24d'], progress=True, check_hash=True))
+    return model

nets/resnest/resnest.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""
+@author: Jun Wang
+@date: 20210301
+@contact: jun21wangustc@gmail.com
+"""
+# based on:
+# https://github.com/zhanghang1989/ResNeSt/blob/master/resnest/torch/resnest.py
+import torch
+import torch.nn as nn
+from .resnet import ResNet, Bottleneck
+class Flatten(nn.Module):
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+def l2_norm(input,axis=1):
+    norm = torch.norm(input,2,axis,True)
+    output = torch.div(input, norm)
+    return output
+class ResNeSt(nn.Module):
+    def __init__(self, num_layers=50, drop_ratio=0.4, feat_dim=512, out_h=7, out_w=7):
+        super(ResNeSt, self).__init__()
+        self.input_layer = nn.Sequential(nn.Conv2d(3, 64, (3, 3), 1, 1 ,bias=False),
+                                      nn.BatchNorm2d(64),
+                                      nn.PReLU(64))
+        self.output_layer = nn.Sequential(nn.BatchNorm2d(2048),
+                                       nn.Dropout(drop_ratio),
+                                       Flatten(),
+                                       nn.Linear(2048 * out_h * out_w, feat_dim),
+                                       nn.BatchNorm1d(feat_dim))
+        if num_layers == 50:
+            self.body = ResNet(Bottleneck, [3, 4, 6, 3],
+                                       radix=2, groups=1, bottleneck_width=64,
+                                       deep_stem=True, stem_width=32, avg_down=True,
+                                       avd=True, avd_first=False)
+        elif num_layers == 101:
+            self.body = ResNet(Bottleneck, [3, 4, 23, 3],
+                               radix=2, groups=1, bottleneck_width=64,
+                               deep_stem=True, stem_width=64, avg_down=True,
+                               avd=True, avd_first=False)
+        elif num_layers == 200:
+            self.body = ResNet(Bottleneck, [3, 24, 36, 3],
+                               radix=2, groups=1, bottleneck_width=64,
+                               deep_stem=True, stem_width=64, avg_down=True,
+                               avd=True, avd_first=False)
+        elif num_layers == 269:
+            self.body = ResNet(Bottleneck, [3, 30, 48, 8],
+                               radix=2, groups=1, bottleneck_width=64,
+                               deep_stem=True, stem_width=64, avg_down=True,
+                               avd=True, avd_first=False)
+        else:
+            pass
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+        return l2_norm(x)

nets/resnest/resnet.py ADDED Viewed

	@@ -0,0 +1,310 @@

+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## Email: zhanghang0704@gmail.com
+## Copyright (c) 2020
+##
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+"""ResNet variants"""
+import math
+import torch
+import torch.nn as nn
+from .splat import SplAtConv2d
+__all__ = ['ResNet', 'Bottleneck']
+class DropBlock2D(object):
+    def __init__(self, *args, **kwargs):
+        raise NotImplementedError
+class GlobalAvgPool2d(nn.Module):
+    def __init__(self):
+        """Global average pooling over the input's spatial dimensions"""
+        super(GlobalAvgPool2d, self).__init__()
+    def forward(self, inputs):
+        return nn.functional.adaptive_avg_pool2d(inputs, 1).view(inputs.size(0), -1)
+class Bottleneck(nn.Module):
+    """ResNet Bottleneck
+    """
+    # pylint: disable=unused-argument
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None,
+                 radix=1, cardinality=1, bottleneck_width=64,
+                 avd=False, avd_first=False, dilation=1, is_first=False,
+                 rectified_conv=False, rectify_avg=False,
+                 norm_layer=None, dropblock_prob=0.0, last_gamma=False):
+        super(Bottleneck, self).__init__()
+        group_width = int(planes * (bottleneck_width / 64.)) * cardinality
+        self.conv1 = nn.Conv2d(inplanes, group_width, kernel_size=1, bias=False)
+        self.bn1 = norm_layer(group_width)
+        self.dropblock_prob = dropblock_prob
+        self.radix = radix
+        self.avd = avd and (stride > 1 or is_first)
+        self.avd_first = avd_first
+        if self.avd:
+            self.avd_layer = nn.AvgPool2d(3, stride, padding=1)
+            stride = 1
+        if dropblock_prob > 0.0:
+            self.dropblock1 = DropBlock2D(dropblock_prob, 3)
+            if radix == 1:
+                self.dropblock2 = DropBlock2D(dropblock_prob, 3)
+            self.dropblock3 = DropBlock2D(dropblock_prob, 3)
+        if radix >= 1:
+            self.conv2 = SplAtConv2d(
+                group_width, group_width, kernel_size=3,
+                stride=stride, padding=dilation,
+                dilation=dilation, groups=cardinality, bias=False,
+                radix=radix, rectify=rectified_conv,
+                rectify_avg=rectify_avg,
+                norm_layer=norm_layer,
+                dropblock_prob=dropblock_prob)
+        elif rectified_conv:
+            from rfconv import RFConv2d
+            self.conv2 = RFConv2d(
+                group_width, group_width, kernel_size=3, stride=stride,
+                padding=dilation, dilation=dilation,
+                groups=cardinality, bias=False,
+                average_mode=rectify_avg)
+            self.bn2 = norm_layer(group_width)
+        else:
+            self.conv2 = nn.Conv2d(
+                group_width, group_width, kernel_size=3, stride=stride,
+                padding=dilation, dilation=dilation,
+                groups=cardinality, bias=False)
+            self.bn2 = norm_layer(group_width)
+        self.conv3 = nn.Conv2d(
+            group_width, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = norm_layer(planes*4)
+        if last_gamma:
+            from torch.nn.init import zeros_
+            zeros_(self.bn3.weight)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.dilation = dilation
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        if self.dropblock_prob > 0.0:
+            out = self.dropblock1(out)
+        out = self.relu(out)
+        if self.avd and self.avd_first:
+            out = self.avd_layer(out)
+        out = self.conv2(out)
+        if self.radix == 0:
+            out = self.bn2(out)
+            if self.dropblock_prob > 0.0:
+                out = self.dropblock2(out)
+            out = self.relu(out)
+        if self.avd and not self.avd_first:
+            out = self.avd_layer(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.dropblock_prob > 0.0:
+            out = self.dropblock3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    """ResNet Variants
+    Parameters
+    ----------
+    block : Block
+        Class for the residual block. Options are BasicBlockV1, BottleneckV1.
+    layers : list of int
+        Numbers of layers in each block
+    classes : int, default 1000
+        Number of classification classes.
+    dilated : bool, default False
+        Applying dilation strategy to pretrained ResNet yielding a stride-8 model,
+        typically used in Semantic Segmentation.
+    norm_layer : object
+        Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`;
+        for Synchronized Cross-GPU BachNormalization).
+    Reference:
+        - He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.
+        - Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions."
+    """
+    # pylint: disable=unused-variable
+    def __init__(self, block, layers, radix=1, groups=1, bottleneck_width=64,
+                 num_classes=1000, dilated=False, dilation=1,
+                 deep_stem=False, stem_width=64, avg_down=False,
+                 rectified_conv=False, rectify_avg=False,
+                 avd=False, avd_first=False,
+                 final_drop=0.0, dropblock_prob=0,
+                 last_gamma=False, norm_layer=nn.BatchNorm2d):
+        self.cardinality = groups
+        self.bottleneck_width = bottleneck_width
+        # ResNet-D params
+        self.inplanes = stem_width*2 if deep_stem else 64
+        self.avg_down = avg_down
+        self.last_gamma = last_gamma
+        # ResNeSt params
+        self.radix = radix
+        self.avd = avd
+        self.avd_first = avd_first
+        super(ResNet, self).__init__()
+        self.rectified_conv = rectified_conv
+        self.rectify_avg = rectify_avg
+        if rectified_conv:
+            from rfconv import RFConv2d
+            conv_layer = RFConv2d
+        else:
+            conv_layer = nn.Conv2d
+        conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {}
+        '''
+        if deep_stem:
+            self.conv1 = nn.Sequential(
+                conv_layer(3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, **conv_kwargs),
+                norm_layer(stem_width),
+                nn.ReLU(inplace=True),
+                conv_layer(stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs),
+                norm_layer(stem_width),
+                nn.ReLU(inplace=True),
+                conv_layer(stem_width, stem_width*2, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs),
+            )
+        else:
+            self.conv1 = conv_layer(3, 64, kernel_size=7, stride=2, padding=3,
+                                   bias=False, **conv_kwargs)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        '''
+        #self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, is_first=False)
+        self.layer1 = self._make_layer(block, 64, layers[0], stride=2, norm_layer=norm_layer, is_first=False)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
+        if dilated or dilation == 4:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
+                                           dilation=2, norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
+                                           dilation=4, norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+        elif dilation==2:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                           dilation=1, norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
+                                           dilation=2, norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+        else:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                           norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                           norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+        '''
+        self.avgpool = GlobalAvgPool2d()
+        self.drop = nn.Dropout(final_drop) if final_drop > 0.0 else None
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, norm_layer):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+        '''
+    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None,
+                    dropblock_prob=0.0, is_first=True):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            down_layers = []
+            if self.avg_down:
+                if dilation == 1:
+                    down_layers.append(nn.AvgPool2d(kernel_size=stride, stride=stride,
+                                                    ceil_mode=True, count_include_pad=False))
+                else:
+                    down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1,
+                                                    ceil_mode=True, count_include_pad=False))
+                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion,
+                                             kernel_size=1, stride=1, bias=False))
+            else:
+                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion,
+                                             kernel_size=1, stride=stride, bias=False))
+            down_layers.append(norm_layer(planes * block.expansion))
+            downsample = nn.Sequential(*down_layers)
+        layers = []
+        if dilation == 1 or dilation == 2:
+            layers.append(block(self.inplanes, planes, stride, downsample=downsample,
+                                radix=self.radix, cardinality=self.cardinality,
+                                bottleneck_width=self.bottleneck_width,
+                                avd=self.avd, avd_first=self.avd_first,
+                                dilation=1, is_first=is_first, rectified_conv=self.rectified_conv,
+                                rectify_avg=self.rectify_avg,
+                                norm_layer=norm_layer, dropblock_prob=dropblock_prob,
+                                last_gamma=self.last_gamma))
+        elif dilation == 4:
+            layers.append(block(self.inplanes, planes, stride, downsample=downsample,
+                                radix=self.radix, cardinality=self.cardinality,
+                                bottleneck_width=self.bottleneck_width,
+                                avd=self.avd, avd_first=self.avd_first,
+                                dilation=2, is_first=is_first, rectified_conv=self.rectified_conv,
+                                rectify_avg=self.rectify_avg,
+                                norm_layer=norm_layer, dropblock_prob=dropblock_prob,
+                                last_gamma=self.last_gamma))
+        else:
+            raise RuntimeError("=> unknown dilation size: {}".format(dilation))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes,
+                                radix=self.radix, cardinality=self.cardinality,
+                                bottleneck_width=self.bottleneck_width,
+                                avd=self.avd, avd_first=self.avd_first,
+                                dilation=dilation, rectified_conv=self.rectified_conv,
+                                rectify_avg=self.rectify_avg,
+                                norm_layer=norm_layer, dropblock_prob=dropblock_prob,
+                                last_gamma=self.last_gamma))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        '''
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        '''
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        '''
+        x = self.avgpool(x)
+        #x = x.view(x.size(0), -1)
+        x = torch.flatten(x, 1)
+        if self.drop:
+            x = self.drop(x)
+        x = self.fc(x)
+        '''
+        return x

nets/resnest/splat.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""Split-Attention"""
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn import Conv2d, Module, Linear, BatchNorm2d, ReLU
+from torch.nn.modules.utils import _pair
+__all__ = ['SplAtConv2d']
+class SplAtConv2d(Module):
+    """Split-Attention Conv2d
+    """
+    def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0),
+                 dilation=(1, 1), groups=1, bias=True,
+                 radix=2, reduction_factor=4,
+                 rectify=False, rectify_avg=False, norm_layer=None,
+                 dropblock_prob=0.0, **kwargs):
+        super(SplAtConv2d, self).__init__()
+        padding = _pair(padding)
+        self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
+        self.rectify_avg = rectify_avg
+        inter_channels = max(in_channels*radix//reduction_factor, 32)
+        self.radix = radix
+        self.cardinality = groups
+        self.channels = channels
+        self.dropblock_prob = dropblock_prob
+        if self.rectify:
+            from rfconv import RFConv2d
+            self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
+                                 groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs)
+        else:
+            self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
+                               groups=groups*radix, bias=bias, **kwargs)
+        self.use_bn = norm_layer is not None
+        if self.use_bn:
+            self.bn0 = norm_layer(channels*radix)
+        self.relu = ReLU(inplace=True)
+        self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
+        if self.use_bn:
+            self.bn1 = norm_layer(inter_channels)
+        self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality)
+        if dropblock_prob > 0.0:
+            self.dropblock = DropBlock2D(dropblock_prob, 3)
+        self.rsoftmax = rSoftMax(radix, groups)
+    def forward(self, x):
+        x = self.conv(x)
+        if self.use_bn:
+            x = self.bn0(x)
+        if self.dropblock_prob > 0.0:
+            x = self.dropblock(x)
+        x = self.relu(x)
+        batch, rchannel = x.shape[:2]
+        if self.radix > 1:
+            if torch.__version__ < '1.5':
+                splited = torch.split(x, int(rchannel//self.radix), dim=1)
+            else:
+                splited = torch.split(x, rchannel//self.radix, dim=1)
+            gap = sum(splited)
+        else:
+            gap = x
+        gap = F.adaptive_avg_pool2d(gap, 1)
+        gap = self.fc1(gap)
+        if self.use_bn:
+            gap = self.bn1(gap)
+        gap = self.relu(gap)
+        atten = self.fc2(gap)
+        atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
+        if self.radix > 1:
+            if torch.__version__ < '1.5':
+                attens = torch.split(atten, int(rchannel//self.radix), dim=1)
+            else:
+                attens = torch.split(atten, rchannel//self.radix, dim=1)
+            out = sum([att*split for (att, split) in zip(attens, splited)])
+        else:
+            out = atten * x
+        return out.contiguous()
+class rSoftMax(nn.Module):
+    def __init__(self, radix, cardinality):
+        super().__init__()
+        self.radix = radix
+        self.cardinality = cardinality
+    def forward(self, x):
+        batch = x.size(0)
+        if self.radix > 1:
+            x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
+            x = F.softmax(x, dim=1)
+            x = x.reshape(batch, -1)
+        else:
+            x = torch.sigmoid(x)
+        return x

utils/__init__.py ADDED Viewed

File without changes

utils/callbacks.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os
+import torch
+import matplotlib
+matplotlib.use('Agg')
+import scipy.signal
+from matplotlib import pyplot as plt
+from torch.utils.tensorboard import SummaryWriter
+class LossHistory():
+    def __init__(self, log_dir, model, input_shape):
+        self.log_dir    = log_dir
+        os.makedirs(self.log_dir)
+        self.writer     = SummaryWriter(self.log_dir)
+        try:
+            for m in model:
+                dummy_input = torch.randn(2, 3, input_shape[0], input_shape[1])
+                self.writer.add_graph(m, dummy_input)
+        except:
+            pass
+    def append_loss(self, epoch, **kwargs):
+        if not os.path.exists(self.log_dir):
+            os.makedirs(self.log_dir)
+        for key, value in kwargs.items():
+            if not hasattr(self, key):
+                setattr(self, key, [])
+            #---------------------------------#
+            #   为列表添加数值
+            #---------------------------------#
+            getattr(self, key).append(value)
+            #---------------------------------#
+            #   写入txt
+            #---------------------------------#
+            with open(os.path.join(self.log_dir, key + ".txt"), 'a') as f:
+                f.write(str(value))
+                f.write("\n")
+            #---------------------------------#
+            #   写入tensorboard
+            #---------------------------------#
+            self.writer.add_scalar(key, value, epoch)
+        self.loss_plot(**kwargs)
+    def loss_plot(self, **kwargs):
+        plt.figure()
+        for key, value in kwargs.items():
+            losses = getattr(self, key)
+            plt.plot(range(len(losses)), losses, linewidth = 2, label = key)
+        plt.grid(True)
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.legend(loc="upper right")
+        plt.savefig(os.path.join(self.log_dir, "epoch_loss.png"))
+        plt.cla()
+        plt.close("all")

utils/dataloader.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import numpy as np
+import torch
+from PIL import Image
+from torch.utils.data.dataset import Dataset
+from utils.utils import cvtColor, preprocess_input
+class CycleGanDataset(Dataset):
+    def __init__(self, annotation_lines_A, annotation_lines_B, input_shape):
+        super(CycleGanDataset, self).__init__()
+        self.annotation_lines_A = annotation_lines_A
+        self.annotation_lines_B = annotation_lines_B
+        self.length_A           = len(self.annotation_lines_A)
+        self.length_B           = len(self.annotation_lines_B)
+        self.input_shape        = input_shape
+    def __len__(self):
+        return max(self.length_A, self.length_B)
+    def __getitem__(self, index):
+        index_A = index % self.length_A
+        image_A = Image.open(self.annotation_lines_A[index_A].split(';')[1].split()[0])
+        image_A = cvtColor(image_A).resize([self.input_shape[1], self.input_shape[0]], Image.BICUBIC)
+        image_A = np.array(image_A, dtype=np.float32)
+        image_A = np.transpose(preprocess_input(image_A), (2, 0, 1))
+        index_B = index % self.length_B
+        image_B = Image.open(self.annotation_lines_B[index_B].split(';')[1].split()[0])
+        image_B = cvtColor(image_B).resize([self.input_shape[1], self.input_shape[0]], Image.BICUBIC)
+        image_B = np.array(image_B, dtype=np.float32)
+        image_B = np.transpose(preprocess_input(image_B), (2, 0, 1))
+        return image_A, image_B
+def CycleGan_dataset_collate(batch):
+    images_A = []
+    images_B = []
+    for image_A, image_B in batch:
+        images_A.append(image_A)
+        images_B.append(image_B)
+    images_A = torch.from_numpy(np.array(images_A, np.float32))
+    images_B = torch.from_numpy(np.array(images_B, np.float32))
+    return images_A, images_B

utils/utils.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import itertools
+import math
+from functools import partial
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from PIL import Image
+#---------------------------------------------------------#
+#   将图像转换成RGB图像，防止灰度图在预测时报错。
+#   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+#---------------------------------------------------------#
+def cvtColor(image):
+    if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
+        return image
+    else:
+        image = image.convert('RGB')
+        return image
+#---------------------------------------------------#
+#   对输入图像进行resize
+#---------------------------------------------------#
+def resize_image(image, size, letterbox_image):
+    iw, ih  = image.size
+    w, h    = size
+    if letterbox_image:
+        scale   = min(w/iw, h/ih)
+        nw      = int(iw*scale)
+        nh      = int(ih*scale)
+        image   = image.resize((nw,nh), Image.BICUBIC)
+        new_image = Image.new('RGB', size, (128, 128, 128))
+        new_image.paste(image, ((w-nw)//2, (h-nh)//2))
+        return new_image, nw, nh
+    else:
+        new_image = image.resize((w, h), Image.BICUBIC)
+        return new_image, None, None
+#----------------------------------------#
+#   预处理训练图片
+#----------------------------------------#
+def preprocess_input(x):
+    x /= 255
+    x -= 0.5
+    x /= 0.5
+    return x
+def postprocess_output(x):
+    x *= 0.5
+    x += 0.5
+    x *= 255
+    return x
+def show_result(num_epoch, G_model_A2B_train, G_model_B2A_train, images_A, images_B):
+    with torch.no_grad():
+        fake_image_B = G_model_A2B_train(images_A)
+        fake_image_A = G_model_B2A_train(images_B)
+        fig, ax = plt.subplots(2, 2)
+        ax = ax.flatten()
+        for j in itertools.product(range(4)):
+            ax[j].get_xaxis().set_visible(False)
+            ax[j].get_yaxis().set_visible(False)
+        ax[0].cla()
+        ax[0].imshow(np.transpose(np.uint8(postprocess_output(images_A.cpu().numpy()[0])), [1, 2, 0]))
+        ax[1].cla()
+        ax[1].imshow(np.transpose(np.clip(fake_image_B.cpu().numpy()[0] * 0.5 + 0.5, 0, 1), [1,2,0]))
+        ax[2].cla()
+        ax[2].imshow(np.transpose(np.uint8(postprocess_output(images_B.cpu().numpy()[0])), [1, 2, 0]))
+        ax[3].cla()
+        ax[3].imshow(np.transpose(np.clip(fake_image_A.cpu().numpy()[0] * 0.5 + 0.5, 0, 1), [1,2,0]))
+        label = 'Epoch {0}'.format(num_epoch)
+        fig.text(0.5, 0.04, label, ha='center')
+        plt.savefig("results/train_out/epoch_" + str(num_epoch) + "_results.png")
+        plt.close('all')  #避免内存泄漏
+def show_config(**kwargs):
+    print('Configurations:')
+    print('-' * 70)
+    print('|%25s | %40s|' % ('keys', 'values'))
+    print('-' * 70)
+    for key, value in kwargs.items():
+        print('|%25s | %40s|' % (str(key), str(value)))
+    print('-' * 70)
+#---------------------------------------------------#
+#   获得学习率
+#---------------------------------------------------#
+def get_lr(optimizer):
+    for param_group in optimizer.param_groups:
+        return param_group['lr']
+def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
+    def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
+        if iters <= warmup_total_iters:
+            # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
+            lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start
+        elif iters >= total_iters - no_aug_iter:
+            lr = min_lr
+        else:
+            lr = min_lr + 0.5 * (lr - min_lr) * (
+                1.0 + math.cos(math.pi* (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter))
+            )
+        return lr
+    def step_lr(lr, decay_rate, step_size, iters):
+        if step_size < 1:
+            raise ValueError("step_size must above 1.")
+        n       = iters // step_size
+        out_lr  = lr * decay_rate ** n
+        return out_lr
+    if lr_decay_type == "cos":
+        warmup_total_iters  = min(max(warmup_iters_ratio * total_iters, 1), 3)
+        warmup_lr_start     = max(warmup_lr_ratio * lr, 1e-6)
+        no_aug_iter         = min(max(no_aug_iter_ratio * total_iters, 1), 15)
+        func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter)
+    else:
+        decay_rate  = (min_lr / lr) ** (1 / (step_num - 1))
+        step_size   = total_iters / step_num
+        func = partial(step_lr, lr, decay_rate, step_size)
+    return func
+def set_optimizer_lr(optimizer, lr_scheduler_func, epoch):
+    lr = lr_scheduler_func(epoch)
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr

utils/utils_fit.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import os
+import torch
+import torch.nn.functional as F
+from tqdm import tqdm
+from nets.cyclegan import compute_gradient_penalty
+from utils.utils import get_lr, show_result
+def fit_one_epoch(G_model_A2B_train, G_model_B2A_train, D_model_A_train, D_model_B_train, G_model_A2B, G_model_B2A, D_model_A, D_model_B, VGG_feature_model, ResNeSt_model, loss_history,
+                G_optimizer, D_optimizer_A, D_optimizer_B, BCE_loss, L1_loss, Face_loss, epoch, epoch_step, gen, Epoch, cuda, fp16, scaler, save_period, save_dir, photo_save_step, local_rank=0):
+    G_total_loss    = 0
+    D_total_loss_A  = 0
+    D_total_loss_B  = 0
+    if local_rank == 0:
+        print('Start Train')
+        pbar = tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3)
+    for iteration, batch in enumerate(gen):
+        if iteration >= epoch_step:
+            break
+        images_A, images_B = batch[0], batch[1]
+        batch_size  = images_A.size()[0]
+        y_real      = torch.ones(batch_size)
+        y_fake      = torch.zeros(batch_size)
+        with torch.no_grad():
+            if cuda:
+                images_A, images_B, y_real, y_fake  = images_A.cuda(local_rank), images_B.cuda(local_rank), y_real.cuda(local_rank), y_fake.cuda(local_rank)
+        if not fp16:
+            #---------------------------------#
+            #   训练生成器A2B和B2A
+            #---------------------------------#
+            G_optimizer.zero_grad()
+            Same_B          = G_model_A2B_train(images_B)
+            loss_identity_B = L1_loss(Same_B, images_B)
+            Same_A          = G_model_B2A_train(images_A)
+            loss_identity_A = L1_loss(Same_A, images_A)
+            fake_B          = G_model_A2B_train(images_A)
+            pred_real       = D_model_B_train(images_B)
+            pred_fake       = D_model_B_train(fake_B)
+            pred_rf         = pred_real - pred_fake.mean()
+            pred_fr         = pred_fake - pred_real.mean()
+            D_train_loss_rf = BCE_loss(pred_rf, y_fake)
+            D_train_loss_fr = BCE_loss(pred_fr, y_real)
+            loss_GAN_A2B    = (D_train_loss_rf + D_train_loss_fr) / 2
+            fake_A          = G_model_B2A_train(images_B)
+            pred_real       = D_model_A_train(images_A)
+            pred_fake       = D_model_A_train(fake_A)
+            pred_rf         = pred_real - pred_fake.mean()
+            pred_fr         = pred_fake - pred_real.mean()
+            D_train_loss_rf = BCE_loss(pred_rf, y_fake)
+            D_train_loss_fr = BCE_loss(pred_fr, y_real)
+            loss_GAN_B2A    = (D_train_loss_rf + D_train_loss_fr) / 2
+            recovered_A     = G_model_B2A_train(fake_B)
+            loss_cycle_ABA  = L1_loss(recovered_A, images_A)
+            loss_per_ABA    = L1_loss(VGG_feature_model(recovered_A), VGG_feature_model(images_A))
+            recovered_A_face  = F.interpolate(recovered_A, size=(112, 112), mode='bicubic', align_corners=True)
+            images_A_face     = F.interpolate(images_A, size=(112, 112), mode='bicubic', align_corners=True)
+            loss_face_ABA     = torch.mean(1. - Face_loss(ResNeSt_model(recovered_A_face), ResNeSt_model(images_A_face)))
+            recovered_B     = G_model_A2B_train(fake_A)
+            loss_cycle_BAB  = L1_loss(recovered_B, images_B)
+            loss_per_BAB    = L1_loss(VGG_feature_model(recovered_B), VGG_feature_model(images_B))
+            recovered_B_face  = F.interpolate(recovered_B, size=(112, 112), mode='bicubic', align_corners=True)
+            images_B_face     = F.interpolate(images_B, size=(112, 112), mode='bicubic', align_corners=True)
+            loss_face_BAB     = torch.mean(1. - Face_loss(ResNeSt_model(recovered_B_face), ResNeSt_model(images_B_face)))
+            G_loss = loss_identity_A * 5.0 + loss_identity_B * 5.0 + loss_GAN_A2B + loss_GAN_B2A  + loss_per_ABA * 2.5 \
+                   + loss_per_BAB *2.5 + loss_cycle_ABA * 10.0 + loss_cycle_BAB * 10.0 + loss_face_ABA * 5 + loss_face_BAB * 5
+            G_loss.backward()
+            G_optimizer.step()
+            #---------------------------------#
+            #   训练评价器A
+            #---------------------------------#
+            D_optimizer_A.zero_grad()
+            pred_real   = D_model_A_train(images_A)
+            pred_fake   = D_model_A_train(fake_A.detach())
+            pred_rf     = pred_real - pred_fake.mean()
+            pred_fr     = pred_fake - pred_real.mean()
+            D_train_loss_rf  = BCE_loss(pred_rf, y_real)
+            D_train_loss_fr  = BCE_loss(pred_fr, y_fake)
+            gradient_penalty = compute_gradient_penalty(D_model_A_train, images_A, fake_A.detach())
+            D_loss_A    = 10 * gradient_penalty + (D_train_loss_rf + D_train_loss_fr) / 2
+            D_loss_A.backward()
+            D_optimizer_A.step()
+            #---------------------------------#
+            #   训��评价器B
+            #---------------------------------#
+            D_optimizer_B.zero_grad()
+            pred_real   = D_model_B_train(images_B)
+            pred_fake   = D_model_B_train(fake_B.detach())
+            pred_rf     = pred_real - pred_fake.mean()
+            pred_fr     = pred_fake - pred_real.mean()
+            D_train_loss_rf  = BCE_loss(pred_rf, y_real)
+            D_train_loss_fr  = BCE_loss(pred_fr, y_fake)
+            gradient_penalty = compute_gradient_penalty(D_model_B_train, images_B, fake_B.detach())
+            D_loss_B    = 10 * gradient_penalty + (D_train_loss_rf + D_train_loss_fr) / 2
+            D_loss_B.backward()
+            D_optimizer_B.step()
+        else:
+            from torch.cuda.amp import autocast
+            #---------------------------------#
+            #   训练生成器A2B和B2A
+            #---------------------------------#
+            with autocast():
+                G_optimizer.zero_grad()
+                Same_B          = G_model_A2B_train(images_B)
+                loss_identity_B = L1_loss(Same_B, images_B)
+                Same_A          = G_model_B2A_train(images_A)
+                loss_identity_A = L1_loss(Same_A, images_A)
+                fake_B          = G_model_A2B_train(images_A)
+                pred_real       = D_model_B_train(images_B)
+                pred_fake       = D_model_B_train(fake_B)
+                pred_rf         = pred_real - pred_fake.mean()
+                pred_fr         = pred_fake - pred_real.mean()
+                D_train_loss_rf = BCE_loss(pred_rf, y_fake)
+                D_train_loss_fr = BCE_loss(pred_fr, y_real)
+                loss_GAN_A2B    = (D_train_loss_rf + D_train_loss_fr) / 2
+                fake_A          = G_model_B2A_train(images_B)
+                pred_real       = D_model_A_train(images_A)
+                pred_fake       = D_model_A_train(fake_A)
+                pred_rf         = pred_real - pred_fake.mean()
+                pred_fr         = pred_fake - pred_real.mean()
+                D_train_loss_rf = BCE_loss(pred_rf, y_fake)
+                D_train_loss_fr = BCE_loss(pred_fr, y_real)
+                loss_GAN_B2A    = (D_train_loss_rf + D_train_loss_fr) / 2
+                recovered_A     = G_model_B2A_train(fake_B)
+                loss_cycle_ABA  = L1_loss(recovered_A, images_A)
+                recovered_A_face  = F.interpolate(recovered_A, size=(112, 112), mode='bicubic', align_corners=True)
+                images_A_face     = F.interpolate(images_A, size=(112, 112), mode='bicubic', align_corners=True)
+                loss_face_ABA     = torch.mean(1. - Face_loss(ResNeSt_model(recovered_A_face), ResNeSt_model(images_A_face)))
+                recovered_B     = G_model_A2B_train(fake_A)
+                loss_cycle_BAB  = L1_loss(recovered_B, images_B)
+                recovered_B_face  = F.interpolate(recovered_B, size=(112, 112), mode='bicubic', align_corners=True)
+                images_B_face     = F.interpolate(images_B, size=(112, 112), mode='bicubic', align_corners=True)
+                loss_face_BAB     = torch.mean(1. - Face_loss(ResNeSt_model(recovered_B_face), ResNeSt_model(images_B_face)))
+                G_loss = loss_identity_A * 5.0 + loss_identity_B * 5.0 + loss_GAN_A2B + loss_GAN_B2A \
+                    + loss_cycle_ABA * 10.0 + loss_cycle_BAB * 10.0 + loss_face_ABA * 5 + loss_face_BAB * 5
+            #----------------------#
+            #   反向传播
+            #----------------------#
+            scaler.scale(G_loss).backward()
+            scaler.step(G_optimizer)
+            scaler.update()
+            #---------------------------------#
+            #   训练评价器A
+            #---------------------------------#
+            with autocast():
+                D_optimizer_A.zero_grad()
+                pred_real   = D_model_A_train(images_A)
+                pred_fake   = D_model_A_train(fake_A.detach())
+                pred_rf     = pred_real - pred_fake.mean()
+                pred_fr     = pred_fake - pred_real.mean()
+                D_train_loss_rf  = BCE_loss(pred_rf, y_real)
+                D_train_loss_fr  = BCE_loss(pred_fr, y_fake)
+                gradient_penalty = compute_gradient_penalty(D_model_A_train, images_A, fake_A.detach())
+                D_loss_A    = 10 * gradient_penalty + (D_train_loss_rf + D_train_loss_fr) / 2
+            #----------------------#
+            #   反向传播
+            #----------------------#
+            scaler.scale(D_loss_A).backward()
+            scaler.step(D_optimizer_A)
+            scaler.update()
+            #---------------------------------#
+            #   训练评价器B
+            #---------------------------------#
+            with autocast():
+                D_optimizer_B.zero_grad()
+                pred_real   = D_model_B_train(images_B)
+                pred_fake   = D_model_B_train(fake_B.detach())
+                pred_rf     = pred_real - pred_fake.mean()
+                pred_fr     = pred_fake - pred_real.mean()
+                D_train_loss_rf  = BCE_loss(pred_rf, y_real)
+                D_train_loss_fr  = BCE_loss(pred_fr, y_fake)
+                gradient_penalty = compute_gradient_penalty(D_model_B_train, images_B, fake_B.detach())
+                D_loss_B    = 10 * gradient_penalty + (D_train_loss_rf + D_train_loss_fr) / 2
+            #----------------------#
+            #   反向传播
+            #----------------------#
+            scaler.scale(D_loss_B).backward()
+            scaler.step(D_optimizer_B)
+            scaler.update()
+        G_total_loss    += G_loss.item()
+        D_total_loss_A  += D_loss_A.item()
+        D_total_loss_B  += D_loss_B.item()
+        if local_rank == 0:
+            pbar.set_postfix(**{'G_loss'    : G_total_loss / (iteration + 1),
+                                'D_loss_A'  : D_total_loss_A / (iteration + 1),
+                                'D_loss_B'  : D_total_loss_B / (iteration + 1),
+                                'lr'        : get_lr(G_optimizer)})
+            pbar.update(1)
+            if iteration % photo_save_step == 0:
+                show_result(epoch + 1, G_model_A2B, G_model_B2A, images_A, images_B)
+    G_total_loss    = G_total_loss / epoch_step
+    D_total_loss_A  = D_total_loss_A / epoch_step
+    D_total_loss_B  = D_total_loss_B / epoch_step
+    if local_rank == 0:
+        pbar.close()
+        print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch))
+        print('G Loss: %.4f || D Loss A: %.4f || D Loss B: %.4f  ' % (G_total_loss, D_total_loss_A, D_total_loss_B))
+        loss_history.append_loss(epoch + 1, G_total_loss = G_total_loss, D_total_loss_A = D_total_loss_A, D_total_loss_B = D_total_loss_B)
+        #-----------------------------------------------#
+        #   保存权值
+        #-----------------------------------------------#
+        if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
+            torch.save(G_model_A2B.state_dict(), os.path.join(save_dir, 'G_model_A2B_Epoch%d-GLoss%.4f-DALoss%.4f-DBLoss%.4f.pth'%(epoch + 1, G_total_loss, D_total_loss_A, D_total_loss_B)))
+            torch.save(G_model_B2A.state_dict(), os.path.join(save_dir, 'G_model_B2A_Epoch%d-GLoss%.4f-DALoss%.4f-DBLoss%.4f.pth'%(epoch + 1, G_total_loss, D_total_loss_A, D_total_loss_B)))
+            torch.save(D_model_A.state_dict(), os.path.join(save_dir, 'D_model_A_Epoch%d-GLoss%.4f-DALoss%.4f-DBLoss%.4f.pth'%(epoch + 1, G_total_loss, D_total_loss_A, D_total_loss_B)))
+            torch.save(D_model_B.state_dict(), os.path.join(save_dir, 'D_model_B_Epoch%d-GLoss%.4f-DALoss%.4f-DBLoss%.4f.pth'%(epoch + 1, G_total_loss, D_total_loss_A, D_total_loss_B)))
+        torch.save(G_model_A2B.state_dict(), os.path.join(save_dir, "G_model_A2B_last_epoch_weights.pth"))
+        torch.save(G_model_B2A.state_dict(), os.path.join(save_dir, "G_model_B2A_last_epoch_weights.pth"))
+        torch.save(D_model_A.state_dict(), os.path.join(save_dir, "D_model_A_last_epoch_weights.pth"))
+        torch.save(D_model_B.state_dict(), os.path.join(save_dir, "D_model_B_last_epoch_weights.pth"))