Raid41 commited on Nov 5, 2023

Commit

0654ee4

1 Parent(s): a3edef0

Upload 33 files

Browse files

Files changed (34) hide show

.gitattributes +1 -0
.gitignore +8 -0
__pycache__/colorizator.cpython-39.pyc +0 -0
colorizator.py +63 -0
denoising/__pycache__/denoiser.cpython-39.pyc +0 -0
denoising/__pycache__/functions.cpython-39.pyc +0 -0
denoising/__pycache__/models.cpython-39.pyc +0 -0
denoising/__pycache__/utils.cpython-39.pyc +0 -0
denoising/denoiser.py +117 -0
denoising/functions.py +101 -0
denoising/models.py +100 -0
denoising/models/net_rgb.pth +3 -0
denoising/utils.py +66 -0
figures/bw1.jpg +0 -0
figures/bw2.jpg +0 -0
figures/bw3.jpg +0 -0
figures/bw4.jpg +0 -0
figures/bw5.jpg +0 -0
figures/bw6.jpg +0 -0
figures/color1.png +0 -0
figures/color2.png +3 -0
figures/color3.png +0 -0
figures/color4.png +0 -0
figures/color5.png +0 -0
figures/color6.png +0 -0
inference.py +90 -0
networks/__pycache__/extractor.cpython-39.pyc +0 -0
networks/__pycache__/models.cpython-39.pyc +0 -0
networks/extractor.py +127 -0
networks/generator.zip +3 -0
networks/models.py +319 -0
readme.md +16 -0
utils/__pycache__/utils.cpython-39.pyc +0 -0
utils/utils.py +44 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+figures/color2.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+*.ipynb
+*.pth
+*.zip
+__pycache__/
+temp_colorization/
+static/temp_images/

__pycache__/colorizator.cpython-39.pyc ADDED Viewed

Binary file (2.7 kB). View file

colorizator.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+print(torch.__version__)
+from torchvision.transforms import ToTensor
+import numpy as np
+from networks.models import Colorizer
+from denoising.denoiser import FFDNetDenoiser
+from utils.utils import resize_pad
+class MangaColorizator:
+    def __init__(self, device, generator_path = 'networks/generator.zip', extractor_path = 'networks/extractor.pth'):
+        self.colorizer = Colorizer().to(device)
+        self.colorizer.generator.load_state_dict(torch.load(generator_path, map_location = device))
+        self.colorizer = self.colorizer.eval()
+        self.denoiser = FFDNetDenoiser(device)
+        self.current_image = None
+        self.current_hint = None
+        self.current_pad = None
+        self.device = device
+    def set_image(self, image, size = 576, apply_denoise = True, denoise_sigma = 25, transform = ToTensor()):
+        if (size % 32 != 0):
+            raise RuntimeError("size is not divisible by 32")
+        if apply_denoise:
+            image = self.denoiser.get_denoised_image(image, sigma = denoise_sigma)
+        image, self.current_pad = resize_pad(image, size)
+        self.current_image = transform(image).unsqueeze(0).to(self.device)
+        self.current_hint = torch.zeros(1, 4, self.current_image.shape[2], self.current_image.shape[3]).float().to(self.device)
+    def update_hint(self, hint, mask):
+        '''
+        Args:
+           hint: numpy.ndarray with shape (self.current_image.shape[2], self.current_image.shape[3], 3)
+           mask: numpy.ndarray with shape (self.current_image.shape[2], self.current_image.shape[3])
+        '''
+        if issubclass(hint.dtype.type, np.integer):
+            hint = hint.astype('float32') / 255
+        hint = (hint - 0.5) / 0.5
+        hint = torch.FloatTensor(hint).permute(2, 0, 1)
+        mask = torch.FloatTensor(np.expand_dims(mask, 0))
+        self.current_hint = torch.cat([hint * mask, mask], 0).unsqueeze(0).to(self.device)
+    def colorize(self):
+        with torch.no_grad():
+            fake_color, _ = self.colorizer(torch.cat([self.current_image, self.current_hint], 1))
+            fake_color = fake_color.detach()
+        result = fake_color[0].detach().cpu().permute(1, 2, 0) * 0.5 + 0.5
+        if self.current_pad[0] != 0:
+            result = result[:-self.current_pad[0]]
+        if self.current_pad[1] != 0:
+            result = result[:, :-self.current_pad[1]]
+        return result.numpy()

denoising/__pycache__/denoiser.cpython-39.pyc ADDED Viewed

Binary file (3.48 kB). View file

denoising/__pycache__/functions.cpython-39.pyc ADDED Viewed

Binary file (3.6 kB). View file

denoising/__pycache__/models.cpython-39.pyc ADDED Viewed

Binary file (3.49 kB). View file

denoising/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (2.39 kB). View file

denoising/denoiser.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""
+Denoise an image with the FFDNet denoising method
+Copyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes.fr>
+This program is free software: you can use, modify and/or
+redistribute it under the terms of the GNU General Public
+License as published by the Free Software Foundation, either
+version 3 of the License, or (at your option) any later
+version. You should have received a copy of this license along
+this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+import os
+import argparse
+import time
+import numpy as np
+import cv2
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+from .models import FFDNet
+from .utils import normalize, variable_to_cv2_image, remove_dataparallel_wrapper, is_rgb
+class FFDNetDenoiser:
+    def __init__(self, _device, _sigma = 25, _weights_dir = 'denoising/models/', _in_ch = 3):
+        self.sigma = _sigma / 255
+        self.weights_dir = _weights_dir
+        self.channels = _in_ch
+        self.device = _device
+        self.model = FFDNet(num_input_channels = _in_ch)
+        self.load_weights()
+        self.model.eval()
+    def load_weights(self):
+        weights_name = 'net_rgb.pth' if self.channels == 3 else 'net_gray.pth'
+        weights_path = os.path.join(self.weights_dir, weights_name)
+        if self.device == 'cuda':
+            state_dict = torch.load(weights_path, map_location=torch.device('cpu'))
+            device_ids = [0]
+            self.model = nn.DataParallel(self.model, device_ids=device_ids).cuda()
+        else:
+            state_dict = torch.load(weights_path, map_location='cpu')
+            # CPU mode: remove the DataParallel wrapper
+            state_dict = remove_dataparallel_wrapper(state_dict)
+        self.model.load_state_dict(state_dict)
+    def get_denoised_image(self, imorig, sigma = None):
+        if sigma is not None:
+            cur_sigma = sigma / 255
+        else:
+            cur_sigma = self.sigma
+        if len(imorig.shape) < 3 or imorig.shape[2] == 1:
+            imorig = np.repeat(np.expand_dims(imorig, 2), 3, 2)
+        imorig = imorig[..., :3]
+        if (max(imorig.shape[0], imorig.shape[1]) > 1200):
+            ratio = max(imorig.shape[0], imorig.shape[1]) / 1200
+            imorig = cv2.resize(imorig, (int(imorig.shape[1] / ratio), int(imorig.shape[0] / ratio)), interpolation = cv2.INTER_AREA)
+        imorig = imorig.transpose(2, 0, 1)
+        if (imorig.max() > 1.2):
+            imorig = normalize(imorig)
+        imorig = np.expand_dims(imorig, 0)
+        # Handle odd sizes
+        expanded_h = False
+        expanded_w = False
+        sh_im = imorig.shape
+        if sh_im[2]%2 == 1:
+            expanded_h = True
+            imorig = np.concatenate((imorig, imorig[:, :, -1, :][:, :, np.newaxis, :]), axis=2)
+        if sh_im[3]%2 == 1:
+            expanded_w = True
+            imorig = np.concatenate((imorig, imorig[:, :, :, -1][:, :, :, np.newaxis]), axis=3)
+        imorig = torch.Tensor(imorig)
+        # Sets data type according to CPU or GPU modes
+        if self.device == 'cuda':
+            dtype = torch.cuda.FloatTensor
+        else:
+            dtype = torch.FloatTensor
+        imnoisy = imorig.clone()
+        with torch.no_grad():
+            imorig, imnoisy = imorig.type(dtype), imnoisy.type(dtype)
+            nsigma = torch.FloatTensor([cur_sigma]).type(dtype)
+        # Estimate noise and subtract it to the input image
+        im_noise_estim = self.model(imnoisy, nsigma)
+        outim = torch.clamp(imnoisy-im_noise_estim, 0., 1.)
+        if expanded_h:
+            imorig = imorig[:, :, :-1, :]
+            outim = outim[:, :, :-1, :]
+            imnoisy = imnoisy[:, :, :-1, :]
+        if expanded_w:
+            imorig = imorig[:, :, :, :-1]
+            outim = outim[:, :, :, :-1]
+            imnoisy = imnoisy[:, :, :, :-1]
+        return variable_to_cv2_image(outim)

denoising/functions.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+Functions implementing custom NN layers
+Copyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes.fr>
+This program is free software: you can use, modify and/or
+redistribute it under the terms of the GNU General Public
+License as published by the Free Software Foundation, either
+version 3 of the License, or (at your option) any later
+version. You should have received a copy of this license along
+this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+import torch
+from torch.autograd import Function, Variable
+def concatenate_input_noise_map(input, noise_sigma):
+    r"""Implements the first layer of FFDNet. This function returns a
+    torch.autograd.Variable composed of the concatenation of the downsampled
+    input image and the noise map. Each image of the batch of size CxHxW gets
+    converted to an array of size 4*CxH/2xW/2. Each of the pixels of the
+    non-overlapped 2x2 patches of the input image are placed in the new array
+    along the first dimension.
+    Args:
+        input: batch containing CxHxW images
+        noise_sigma: the value of the pixels of the CxH/2xW/2 noise map
+    """
+    # noise_sigma is a list of length batch_size
+    N, C, H, W = input.size()
+    dtype = input.type()
+    sca = 2
+    sca2 = sca*sca
+    Cout = sca2*C
+    Hout = H//sca
+    Wout = W//sca
+    idxL = [[0, 0], [0, 1], [1, 0], [1, 1]]
+    # Fill the downsampled image with zeros
+    if 'cuda' in dtype:
+        downsampledfeatures = torch.cuda.FloatTensor(N, Cout, Hout, Wout).fill_(0)
+    else:
+        downsampledfeatures = torch.FloatTensor(N, Cout, Hout, Wout).fill_(0)
+    # Build the CxH/2xW/2 noise map
+    noise_map = noise_sigma.view(N, 1, 1, 1).repeat(1, C, Hout, Wout)
+    # Populate output
+    for idx in range(sca2):
+        downsampledfeatures[:, idx:Cout:sca2, :, :] = \
+            input[:, :, idxL[idx][0]::sca, idxL[idx][1]::sca]
+    # concatenate de-interleaved mosaic with noise map
+    return torch.cat((noise_map, downsampledfeatures), 1)
+class UpSampleFeaturesFunction(Function):
+    r"""Extends PyTorch's modules by implementing a torch.autograd.Function.
+    This class implements the forward and backward methods of the last layer
+    of FFDNet. It basically performs the inverse of
+    concatenate_input_noise_map(): it converts each of the images of a
+    batch of size CxH/2xW/2 to images of size C/4xHxW
+    """
+    @staticmethod
+    def forward(ctx, input):
+        N, Cin, Hin, Win = input.size()
+        dtype = input.type()
+        sca = 2
+        sca2 = sca*sca
+        Cout = Cin//sca2
+        Hout = Hin*sca
+        Wout = Win*sca
+        idxL = [[0, 0], [0, 1], [1, 0], [1, 1]]
+        assert (Cin%sca2 == 0), 'Invalid input dimensions: number of channels should be divisible by 4'
+        result = torch.zeros((N, Cout, Hout, Wout)).type(dtype)
+        for idx in range(sca2):
+            result[:, :, idxL[idx][0]::sca, idxL[idx][1]::sca] = input[:, idx:Cin:sca2, :, :]
+        return result
+    @staticmethod
+    def backward(ctx, grad_output):
+        N, Cg_out, Hg_out, Wg_out = grad_output.size()
+        dtype = grad_output.data.type()
+        sca = 2
+        sca2 = sca*sca
+        Cg_in = sca2*Cg_out
+        Hg_in = Hg_out//sca
+        Wg_in = Wg_out//sca
+        idxL = [[0, 0], [0, 1], [1, 0], [1, 1]]
+        # Build output
+        grad_input = torch.zeros((N, Cg_in, Hg_in, Wg_in)).type(dtype)
+        # Populate output
+        for idx in range(sca2):
+            grad_input[:, idx:Cg_in:sca2, :, :] = grad_output.data[:, :, idxL[idx][0]::sca, idxL[idx][1]::sca]
+        return Variable(grad_input)
+# Alias functions
+upsamplefeatures = UpSampleFeaturesFunction.apply

denoising/models.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""
+Definition of the FFDNet model and its custom layers
+Copyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes.fr>
+This program is free software: you can use, modify and/or
+redistribute it under the terms of the GNU General Public
+License as published by the Free Software Foundation, either
+version 3 of the License, or (at your option) any later
+version. You should have received a copy of this license along
+this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+import torch.nn as nn
+from torch.autograd import Variable
+import denoising.functions as functions
+class UpSampleFeatures(nn.Module):
+    r"""Implements the last layer of FFDNet
+    """
+    def __init__(self):
+        super(UpSampleFeatures, self).__init__()
+    def forward(self, x):
+        return functions.upsamplefeatures(x)
+class IntermediateDnCNN(nn.Module):
+    r"""Implements the middel part of the FFDNet architecture, which
+    is basically a DnCNN net
+    """
+    def __init__(self, input_features, middle_features, num_conv_layers):
+        super(IntermediateDnCNN, self).__init__()
+        self.kernel_size = 3
+        self.padding = 1
+        self.input_features = input_features
+        self.num_conv_layers = num_conv_layers
+        self.middle_features = middle_features
+        if self.input_features == 5:
+            self.output_features = 4 #Grayscale image
+        elif self.input_features == 15:
+            self.output_features = 12 #RGB image
+        else:
+            raise Exception('Invalid number of input features')
+        layers = []
+        layers.append(nn.Conv2d(in_channels=self.input_features,\
+                                out_channels=self.middle_features,\
+                                kernel_size=self.kernel_size,\
+                                padding=self.padding,\
+                                bias=False))
+        layers.append(nn.ReLU(inplace=True))
+        for _ in range(self.num_conv_layers-2):
+            layers.append(nn.Conv2d(in_channels=self.middle_features,\
+                                    out_channels=self.middle_features,\
+                                    kernel_size=self.kernel_size,\
+                                    padding=self.padding,\
+                                    bias=False))
+            layers.append(nn.BatchNorm2d(self.middle_features))
+            layers.append(nn.ReLU(inplace=True))
+        layers.append(nn.Conv2d(in_channels=self.middle_features,\
+                                out_channels=self.output_features,\
+                                kernel_size=self.kernel_size,\
+                                padding=self.padding,\
+                                bias=False))
+        self.itermediate_dncnn = nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.itermediate_dncnn(x)
+        return out
+class FFDNet(nn.Module):
+    r"""Implements the FFDNet architecture
+    """
+    def __init__(self, num_input_channels):
+        super(FFDNet, self).__init__()
+        self.num_input_channels = num_input_channels
+        if self.num_input_channels == 1:
+            # Grayscale image
+            self.num_feature_maps = 64
+            self.num_conv_layers = 15
+            self.downsampled_channels = 5
+            self.output_features = 4
+        elif self.num_input_channels == 3:
+            # RGB image
+            self.num_feature_maps = 96
+            self.num_conv_layers = 12
+            self.downsampled_channels = 15
+            self.output_features = 12
+        else:
+            raise Exception('Invalid number of input features')
+        self.intermediate_dncnn = IntermediateDnCNN(\
+                input_features=self.downsampled_channels,\
+                middle_features=self.num_feature_maps,\
+                num_conv_layers=self.num_conv_layers)
+        self.upsamplefeatures = UpSampleFeatures()
+    def forward(self, x, noise_sigma):
+        concat_noise_x = functions.concatenate_input_noise_map(x.data, noise_sigma.data)
+        concat_noise_x = Variable(concat_noise_x)
+        h_dncnn = self.intermediate_dncnn(concat_noise_x)
+        pred_noise = self.upsamplefeatures(h_dncnn)
+        return pred_noise

denoising/models/net_rgb.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fe98bfd2ac870b15f360661b1c4789eecefc6dc2e4462842a0dd15e149a0433
+size 3435567

denoising/utils.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""
+Different utilities such as orthogonalization of weights, initialization of
+loggers, etc
+Copyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes.fr>
+This program is free software: you can use, modify and/or
+redistribute it under the terms of the GNU General Public
+License as published by the Free Software Foundation, either
+version 3 of the License, or (at your option) any later
+version. You should have received a copy of this license along
+this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+import numpy as np
+import cv2
+def variable_to_cv2_image(varim):
+    r"""Converts a torch.autograd.Variable to an OpenCV image
+    Args:
+        varim: a torch.autograd.Variable
+    """
+    nchannels = varim.size()[1]
+    if nchannels == 1:
+        res = (varim.data.cpu().numpy()[0, 0, :]*255.).clip(0, 255).astype(np.uint8)
+    elif nchannels == 3:
+        res = varim.data.cpu().numpy()[0]
+        res = cv2.cvtColor(res.transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
+        res = (res*255.).clip(0, 255).astype(np.uint8)
+    else:
+        raise Exception('Number of color channels not supported')
+    return res
+def normalize(data):
+    return np.float32(data/255.)
+def remove_dataparallel_wrapper(state_dict):
+    r"""Converts a DataParallel model to a normal one by removing the "module."
+    wrapper in the module dictionary
+    Args:
+        state_dict: a torch.nn.DataParallel state dictionary
+    """
+    from collections import OrderedDict
+    new_state_dict = OrderedDict()
+    for k, vl in state_dict.items():
+        name = k[7:] # remove 'module.' of DataParallel
+        new_state_dict[name] = vl
+    return new_state_dict
+def is_rgb(im_path):
+    r""" Returns True if the image in im_path is an RGB image
+    """
+    from skimage.io import imread
+    rgb = False
+    im = imread(im_path)
+    if (len(im.shape) == 3):
+        if not(np.allclose(im[...,0], im[...,1]) and np.allclose(im[...,2], im[...,1])):
+            rgb = True
+    print("rgb: {}".format(rgb))
+    print("im shape: {}".format(im.shape))
+    return rgb

figures/bw1.jpg ADDED Viewed

figures/bw2.jpg ADDED Viewed

figures/bw3.jpg ADDED Viewed

figures/bw4.jpg ADDED Viewed

figures/bw5.jpg ADDED Viewed

figures/bw6.jpg ADDED Viewed

figures/color1.png ADDED Viewed

figures/color2.png ADDED Viewed

Git LFS Details

SHA256: 6900649446215ef2f21e0f36cdb0f9001907e7934e552c920b5bf0fa38e8dd10
Pointer size: 132 Bytes
Size of remote file: 1.4 MB

figures/color3.png ADDED Viewed

figures/color4.png ADDED Viewed

figures/color5.png ADDED Viewed

figures/color6.png ADDED Viewed

inference.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+import argparse
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+from colorizator import MangaColorizator
+def process_image(image, colorizator, args):
+    colorizator.set_image(image, args.size, args.denoiser, args.denoiser_sigma)
+    return colorizator.colorize()
+def colorize_single_image(image_path, save_path, colorizator, args):
+        image = plt.imread(image_path)
+        colorization = process_image(image, colorizator, args)
+        plt.imsave(save_path, colorization)
+        return True
+def colorize_images(target_path, colorizator, args):
+    images = os.listdir(args.path)
+    for image_name in images:
+        file_path = os.path.join(args.path, image_name)
+        if os.path.isdir(file_path):
+            continue
+        name, ext = os.path.splitext(image_name)
+        if (ext != '.png'):
+            image_name = name + '.png'
+        print(file_path)
+        save_path = os.path.join(target_path, image_name)
+        colorize_single_image(file_path, save_path, colorizator, args)
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-p", "--path", required=True)
+    parser.add_argument("-gen", "--generator", default = 'networks/generator.zip')
+    parser.add_argument("-ext", "--extractor", default = 'networks/extractor.pth')
+    parser.add_argument('-g', '--gpu', dest = 'gpu', action = 'store_true')
+    parser.add_argument('-nd', '--no_denoise', dest = 'denoiser', action = 'store_false')
+    parser.add_argument("-ds", "--denoiser_sigma", type = int, default = 25)
+    parser.add_argument("-s", "--size", type = int, default = 576)
+    parser.set_defaults(gpu = False)
+    parser.set_defaults(denoiser = True)
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parse_args()
+    if args.gpu:
+        device = 'cuda'
+    else:
+        device = 'cpu'
+    colorizer = MangaColorizator(device, args.generator, args.extractor)
+    if os.path.isdir(args.path):
+        colorization_path = os.path.join(args.path, 'colorization')
+        if not os.path.exists(colorization_path):
+            os.makedirs(colorization_path)
+        colorize_images(colorization_path, colorizer, args)
+    elif os.path.isfile(args.path):
+        split = os.path.splitext(args.path)
+        if split[1].lower() in ('.jpg', '.png', ',jpeg'):
+            new_image_path = split[0] + '_colorized' + '.png'
+            colorize_single_image(args.path, new_image_path, colorizer, args)
+        else:
+            print('Wrong format')
+    else:
+        print('Wrong path')

networks/__pycache__/extractor.cpython-39.pyc ADDED Viewed

Binary file (3.82 kB). View file

networks/__pycache__/models.cpython-39.pyc ADDED Viewed

Binary file (10.7 kB). View file

networks/extractor.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import torch
+import torch.nn as nn
+import math
+'''https://github.com/blandocs/Tag2Pix/blob/master/model/pretrained.py'''
+# Pretrained version
+class Selayer(nn.Module):
+    def __init__(self, inplanes):
+        super(Selayer, self).__init__()
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.conv1 = nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1)
+        self.conv2 = nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        out = self.global_avgpool(x)
+        out = self.conv1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.sigmoid(out)
+        return x * out
+class BottleneckX_Origin(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, cardinality, stride=1, downsample=None):
+        super(BottleneckX_Origin, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes * 2)
+        self.conv2 = nn.Conv2d(planes * 2, planes * 2, kernel_size=3, stride=stride,
+                               padding=1, groups=cardinality, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes * 2)
+        self.conv3 = nn.Conv2d(planes * 2, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.selayer = Selayer(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        out = self.selayer(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class SEResNeXt_Origin(nn.Module):
+    def __init__(self, block, layers, input_channels=3, cardinality=32, num_classes=1000):
+        super(SEResNeXt_Origin, self).__init__()
+        self.cardinality = cardinality
+        self.inplanes = 64
+        self.input_channels = input_channels
+        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, self.cardinality, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, self.cardinality))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x1 = self.relu(x)
+        x2 = self.layer1(x1)
+        x3 = self.layer2(x2)
+        x4 = self.layer3(x3)
+        return x1, x2, x3, x4

networks/generator.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae9bc204753267a38eeb43d262fee9c96fb1b5035fd89bbdf567fc69e5d3ebd1
+size 202088636

networks/models.py ADDED Viewed

	@@ -0,0 +1,319 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as M
+import math
+from torch import Tensor
+from torch.nn import Parameter
+from .extractor import SEResNeXt_Origin, BottleneckX_Origin
+'''https://github.com/orashi/AlacGAN/blob/master/models/standard.py'''
+def l2normalize(v, eps=1e-12):
+    return v / (v.norm() + eps)
+class SpectralNorm(nn.Module):
+    def __init__(self, module, name='weight', power_iterations=1):
+        super(SpectralNorm, self).__init__()
+        self.module = module
+        self.name = name
+        self.power_iterations = power_iterations
+        if not self._made_params():
+            self._make_params()
+    def _update_u_v(self):
+        u = getattr(self.module, self.name + "_u")
+        v = getattr(self.module, self.name + "_v")
+        w = getattr(self.module, self.name + "_bar")
+        height = w.data.shape[0]
+        for _ in range(self.power_iterations):
+            v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
+            u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))
+        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
+        sigma = u.dot(w.view(height, -1).mv(v))
+        setattr(self.module, self.name, w / sigma.expand_as(w))
+    def _made_params(self):
+        try:
+            u = getattr(self.module, self.name + "_u")
+            v = getattr(self.module, self.name + "_v")
+            w = getattr(self.module, self.name + "_bar")
+            return True
+        except AttributeError:
+            return False
+    def _make_params(self):
+        w = getattr(self.module, self.name)
+        height = w.data.shape[0]
+        width = w.view(height, -1).data.shape[1]
+        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
+        v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
+        u.data = l2normalize(u.data)
+        v.data = l2normalize(v.data)
+        w_bar = Parameter(w.data)
+        del self.module._parameters[self.name]
+        self.module.register_parameter(self.name + "_u", u)
+        self.module.register_parameter(self.name + "_v", v)
+        self.module.register_parameter(self.name + "_bar", w_bar)
+    def forward(self, *args):
+        self._update_u_v()
+        return self.module.forward(*args)
+class Selayer(nn.Module):
+    def __init__(self, inplanes):
+        super(Selayer, self).__init__()
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.conv1 = nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1)
+        self.conv2 = nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        out = self.global_avgpool(x)
+        out = self.conv1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.sigmoid(out)
+        return x * out
+class SelayerSpectr(nn.Module):
+    def __init__(self, inplanes):
+        super(SelayerSpectr, self).__init__()
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.conv1 = SpectralNorm(nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1))
+        self.conv2 = SpectralNorm(nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1))
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        out = self.global_avgpool(x)
+        out = self.conv1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.sigmoid(out)
+        return x * out
+class ResNeXtBottleneck(nn.Module):
+    def __init__(self, in_channels=256, out_channels=256, stride=1, cardinality=32, dilate=1):
+        super(ResNeXtBottleneck, self).__init__()
+        D = out_channels // 2
+        self.out_channels = out_channels
+        self.conv_reduce = nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False)
+        self.conv_conv = nn.Conv2d(D, D, kernel_size=2 + stride, stride=stride, padding=dilate, dilation=dilate,
+                                   groups=cardinality,
+                                   bias=False)
+        self.conv_expand = nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
+        self.shortcut = nn.Sequential()
+        if stride != 1:
+            self.shortcut.add_module('shortcut',
+                                     nn.AvgPool2d(2, stride=2))
+        self.selayer = Selayer(out_channels)
+    def forward(self, x):
+        bottleneck = self.conv_reduce.forward(x)
+        bottleneck = F.leaky_relu(bottleneck, 0.2, True)
+        bottleneck = self.conv_conv.forward(bottleneck)
+        bottleneck = F.leaky_relu(bottleneck, 0.2, True)
+        bottleneck = self.conv_expand.forward(bottleneck)
+        bottleneck = self.selayer(bottleneck)
+        x = self.shortcut.forward(x)
+        return x + bottleneck
+class SpectrResNeXtBottleneck(nn.Module):
+    def __init__(self, in_channels=256, out_channels=256, stride=1, cardinality=32, dilate=1):
+        super(SpectrResNeXtBottleneck, self).__init__()
+        D = out_channels // 2
+        self.out_channels = out_channels
+        self.conv_reduce = SpectralNorm(nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False))
+        self.conv_conv = SpectralNorm(nn.Conv2d(D, D, kernel_size=2 + stride, stride=stride, padding=dilate, dilation=dilate,
+                                   groups=cardinality,
+                                   bias=False))
+        self.conv_expand = SpectralNorm(nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False))
+        self.shortcut = nn.Sequential()
+        if stride != 1:
+            self.shortcut.add_module('shortcut',
+                                     nn.AvgPool2d(2, stride=2))
+        self.selayer = SelayerSpectr(out_channels)
+    def forward(self, x):
+        bottleneck = self.conv_reduce.forward(x)
+        bottleneck = F.leaky_relu(bottleneck, 0.2, True)
+        bottleneck = self.conv_conv.forward(bottleneck)
+        bottleneck = F.leaky_relu(bottleneck, 0.2, True)
+        bottleneck = self.conv_expand.forward(bottleneck)
+        bottleneck = self.selayer(bottleneck)
+        x = self.shortcut.forward(x)
+        return x + bottleneck
+class FeatureConv(nn.Module):
+    def __init__(self, input_dim=512, output_dim=512):
+        super(FeatureConv, self).__init__()
+        no_bn = True
+        seq = []
+        seq.append(nn.Conv2d(input_dim, output_dim, kernel_size=3, stride=1, padding=1, bias=False))
+        if not no_bn: seq.append(nn.BatchNorm2d(output_dim))
+        seq.append(nn.ReLU(inplace=True))
+        seq.append(nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=2, padding=1, bias=False))
+        if not no_bn: seq.append(nn.BatchNorm2d(output_dim))
+        seq.append(nn.ReLU(inplace=True))
+        seq.append(nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=1, padding=1, bias=False))
+        seq.append(nn.ReLU(inplace=True))
+        self.network = nn.Sequential(*seq)
+    def forward(self, x):
+        return self.network(x)
+class Generator(nn.Module):
+    def __init__(self, ngf=64):
+        super(Generator, self).__init__()
+        self.encoder = SEResNeXt_Origin(BottleneckX_Origin, [3, 4, 6, 3], num_classes= 370, input_channels=1)
+        self.to0 =  self._make_encoder_block_first(5, 32)
+        self.to1 = self._make_encoder_block(32, 64)
+        self.to2 = self._make_encoder_block(64, 92)
+        self.to3 = self._make_encoder_block(92, 128)
+        self.to4 = self._make_encoder_block(128, 256)
+        self.deconv_for_decoder = nn.Sequential(
+            nn.ConvTranspose2d(256, 128, 3, stride=2, padding=1, output_padding=1), # output is 64 * 64
+            nn.LeakyReLU(0.2),
+            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1), # output is 128 * 128
+            nn.LeakyReLU(0.2),
+            nn.ConvTranspose2d(64, 32, 3, stride=1, padding=1, output_padding=0), # output is 256 * 256
+            nn.LeakyReLU(0.2),
+            nn.ConvTranspose2d(32, 3, 3, stride=1, padding=1, output_padding=0), # output is 256 * 256
+            nn.Tanh(),
+        )
+        tunnel4 = nn.Sequential(*[ResNeXtBottleneck(512, 512, cardinality=32, dilate=1) for _ in range(20)])
+        self.tunnel4 = nn.Sequential(nn.Conv2d(1024 + 128, 512, kernel_size=3, stride=1, padding=1),
+                                     nn.LeakyReLU(0.2, True),
+                                     tunnel4,
+                                     nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
+                                     nn.PixelShuffle(2),
+                                     nn.LeakyReLU(0.2, True)
+                                     )  # 64
+        depth = 2
+        tunnel = [ResNeXtBottleneck(256, 256, cardinality=32, dilate=1) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(256, 256, cardinality=32, dilate=2) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(256, 256, cardinality=32, dilate=4) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(256, 256, cardinality=32, dilate=2),
+                   ResNeXtBottleneck(256, 256, cardinality=32, dilate=1)]
+        tunnel3 = nn.Sequential(*tunnel)
+        self.tunnel3 = nn.Sequential(nn.Conv2d(512 + 256, 256, kernel_size=3, stride=1, padding=1),
+                                     nn.LeakyReLU(0.2, True),
+                                     tunnel3,
+                                     nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
+                                     nn.PixelShuffle(2),
+                                     nn.LeakyReLU(0.2, True)
+                                     )  # 128
+        tunnel = [ResNeXtBottleneck(128, 128, cardinality=32, dilate=1) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(128, 128, cardinality=32, dilate=2) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(128, 128, cardinality=32, dilate=4) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(128, 128, cardinality=32, dilate=2),
+                   ResNeXtBottleneck(128, 128, cardinality=32, dilate=1)]
+        tunnel2 = nn.Sequential(*tunnel)
+        self.tunnel2 = nn.Sequential(nn.Conv2d(128 + 256 + 64, 128, kernel_size=3, stride=1, padding=1),
+                                     nn.LeakyReLU(0.2, True),
+                                     tunnel2,
+                                     nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
+                                     nn.PixelShuffle(2),
+                                     nn.LeakyReLU(0.2, True)
+                                     )
+        tunnel = [ResNeXtBottleneck(64, 64, cardinality=16, dilate=1)]
+        tunnel += [ResNeXtBottleneck(64, 64, cardinality=16, dilate=2)]
+        tunnel += [ResNeXtBottleneck(64, 64, cardinality=16, dilate=4)]
+        tunnel += [ResNeXtBottleneck(64, 64, cardinality=16, dilate=2),
+                   ResNeXtBottleneck(64, 64, cardinality=16, dilate=1)]
+        tunnel1 = nn.Sequential(*tunnel)
+        self.tunnel1 = nn.Sequential(nn.Conv2d(64 + 32, 64, kernel_size=3, stride=1, padding=1),
+                                     nn.LeakyReLU(0.2, True),
+                                     tunnel1,
+                                     nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
+                                     nn.PixelShuffle(2),
+                                     nn.LeakyReLU(0.2, True)
+                                     )
+        self.exit = nn.Sequential(nn.Conv2d(64 + 32, 32, kernel_size=3, stride=1, padding=1),
+                                 nn.LeakyReLU(0.2, True),
+                                 nn.Conv2d(32, 3, kernel_size= 1, stride = 1, padding = 0))
+    def _make_encoder_block(self, inplanes, planes):
+        return nn.Sequential(
+            nn.Conv2d(inplanes, planes, 3, 2, 1),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(planes, planes, 3, 1, 1),
+            nn.LeakyReLU(0.2),
+        )
+    def _make_encoder_block_first(self, inplanes, planes):
+        return nn.Sequential(
+            nn.Conv2d(inplanes, planes, 3, 1, 1),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(planes, planes, 3, 1, 1),
+            nn.LeakyReLU(0.2),
+        )
+    def forward(self, sketch):
+        x0 = self.to0(sketch)
+        aux_out = self.to1(x0)
+        aux_out = self.to2(aux_out)
+        aux_out = self.to3(aux_out)
+        x1, x2, x3, x4 = self.encoder(sketch[:, 0:1])
+        out = self.tunnel4(torch.cat([x4, aux_out], 1))
+        x = self.tunnel3(torch.cat([out, x3], 1))
+        x = self.tunnel2(torch.cat([x, x2, x1], 1))
+        x = torch.tanh(self.exit(torch.cat([x, x0], 1)))
+        decoder_output = self.deconv_for_decoder(out)
+        return x, decoder_output
+class Colorizer(nn.Module):
+    def __init__(self):
+        super(Colorizer, self).__init__()
+        self.generator = Generator()
+    def forward(self, x, extractor_grad = False):
+        fake, guide = self.generator(x)
+        return fake, guide

readme.md ADDED Viewed

	@@ -0,0 +1,16 @@

+# Automatic colorization
+1. Download [generator](https://drive.google.com/file/d/1qmxUEKADkEM4iYLp1fpPLLKnfZ6tcF-t/view?usp=sharing) and [denoiser](https://drive.google.com/file/d/161oyQcYpdkVdw8gKz_MA8RD-Wtg9XDp3/view?usp=sharing) weights. Put generator and extractor weights in `networks` and denoiser weights in `denoising/models`.
+2. To colorize image or folder of images, use the following command:
+```
+$ python inference.py -p "path to file or folder"
+```
+| col 1      | col 2      |
+|------------|-------------|
+| <img src="figures/bw1.jpg" width="512"> | <img src="figures/color1.png" width="512"> |
+| <img src="figures/bw2.jpg" width="512"> | <img src="figures/color2.png" width="512"> |
+| <img src="figures/bw3.jpg" width="512"> | <img src="figures/color3.png" width="512"> |
+| <img src="figures/bw4.jpg" width="512"> | <img src="figures/color4.png" width="512"> |
+| <img src="figures/bw5.jpg" width="512"> | <img src="figures/color5.png" width="512"> |
+| <img src="figures/bw6.jpg" width="512"> | <img src="figures/color6.png" width="512"> |

utils/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (1.06 kB). View file

utils/utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import numpy as np
+import cv2
+def resize_pad(img, size = 256):
+    if len(img.shape) == 2:
+        img = np.expand_dims(img, 2)
+    if img.shape[2] == 1:
+        img = np.repeat(img, 3, 2)
+    if img.shape[2] == 4:
+        img = img[:, :, :3]
+    pad = None
+    if (img.shape[0] < img.shape[1]):
+        height = img.shape[0]
+        ratio = height / (size * 1.5)
+        width = int(np.ceil(img.shape[1] / ratio))
+        img = cv2.resize(img, (width, int(size * 1.5)), interpolation = cv2.INTER_AREA)
+        new_width = width + (32 - width % 32)
+        pad = (0, new_width - width)
+        img = np.pad(img, ((0, 0), (0, pad[1]), (0, 0)), 'maximum')
+    else:
+        width = img.shape[1]
+        ratio = width / size
+        height = int(np.ceil(img.shape[0] / ratio))
+        img = cv2.resize(img, (size, height), interpolation = cv2.INTER_AREA)
+        new_height = height + (32 - height % 32)
+        pad = (new_height - height, 0)
+        img = np.pad(img, ((0, pad[0]), (0, 0), (0, 0)), 'maximum')
+    if (img.dtype == 'float32'):
+        np.clip(img, 0, 1, out = img)
+    return img[:, :, :1], pad