Raid41 commited on Oct 24, 2023

Commit

5f240b7

1 Parent(s): ba264a5

Upload 34 files

Browse files

Files changed (35) hide show

.gitattributes +16 -0
.gitignore +5 -0
configs/train_config.json +10 -0
configs/xdog_config.json +8 -0
dataset/datasets.py +107 -0
dataset/manga/train/bw/002-0000-0000.png +3 -0
dataset/manga/train/bw/003-0000-0000.png +0 -0
dataset/manga/train/bw/x2-0000-0000.png +3 -0
dataset/manga/train/bw/x3-0000-0000.png +3 -0
dataset/manga/train/bw/x5-0000-0000.png +3 -0
dataset/manga/train/color/002-0000-0000.png +3 -0
dataset/manga/train/color/003-0000-0000.png +0 -0
dataset/manga/train/color/004-0000-0000.png +0 -0
dataset/manga/train/color/x1-0000-0000.png +3 -0
dataset/manga/train/color/x2-0000-0000.png +3 -0
dataset/manga/train/color/x3-0000-0000.png +3 -0
dataset/manga/train/color/x4-0000-0000.png +3 -0
dataset/manga/train/color/x5-0000-0000.png +3 -0
dataset/manga/train/real_manga/002-0000-0000.png +3 -0
dataset/manga/train/real_manga/003-0000-0000.png +0 -0
dataset/manga/train/real_manga/004-0000-0000.png +0 -0
dataset/manga/train/real_manga/x1-0000-0000.png +3 -0
dataset/manga/train/real_manga/x2-0000-0000.png +3 -0
dataset/manga/train/real_manga/x3-0000-0000.png +3 -0
dataset/manga/train/real_manga/x4-0000-0000.png +3 -0
dataset/manga/train/real_manga/x5-0000-0000.png +3 -0
inference.py +154 -0
model/extractor.pth +3 -0
model/extractor.py +127 -0
model/models.py +422 -0
model/vgg16-397923af.pth +3 -0
train.py +294 -0
utils/dataset_utils.py +141 -0
utils/utils.py +102 -0
utils/xdog.py +68 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/bw/002-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/bw/x2-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/bw/x3-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/bw/x5-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/color/002-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/color/x1-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/color/x2-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/color/x3-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/color/x4-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/color/x5-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/real_manga/002-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/real_manga/x1-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/real_manga/x2-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/real_manga/x3-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/real_manga/x4-0000-0000.png filter=lfs diff=lfs merge=lfs -text
+dataset/manga/train/real_manga/x5-0000-0000.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+*.ipynb
+*.pth
+__pycache__/
+temp_colorization/

configs/train_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "generator_lr" : 1e-4,
+    "discriminator_lr" : 4e-4,
+    "epochs" : 15,
+    "lr_decrease_epoch" : 10,
+    "finetuning_generator_lr" : 1e-6,
+    "finetuning_iterations" : 3500,
+    "batch_size" : 4,
+    "number_of_mults" : 3
+}

configs/xdog_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "sigma" : 0.5,
+     "k" : 8,
+     "phi" : 89.25,
+     "gamma" : 0.95,
+     "eps" : -0.1,
+     "mult" : 7
+}

dataset/datasets.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import torch
+import os
+import torchvision.transforms as transforms
+import matplotlib.pyplot as plt
+import numpy as np
+from utils.utils import generate_mask
+class TrainDataset(torch.utils.data.Dataset):
+    def __init__(self, data_path, transform = None, mults_amount = 1):
+        self.data = os.listdir(os.path.join(data_path, 'color'))
+        self.data_path = data_path
+        self.transform = transform
+        self.mults_amount = mults_amount
+        self.ToTensor = transforms.ToTensor()
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        image_name = self.data[idx]
+        color_img = plt.imread(os.path.join(self.data_path, 'color', image_name))
+        if self.mults_amount > 1:
+            mult_number = np.random.choice(range(self.mults_amount))
+            bw_name = image_name[:image_name.rfind('.')] + '_' + str(mult_number) + '.png'
+            dfm_name = image_name[:image_name.rfind('.')] + '_' + str(mult_number) + '_dfm.png'
+        else:
+            bw_name = self.data[idx]
+            dfm_name =  os.path.splitext(self.data[idx])[0] + '0_dfm.png'
+        bw_img =  np.expand_dims(plt.imread(os.path.join(self.data_path, 'bw', bw_name)), 2)
+        dfm_img =  np.expand_dims(plt.imread(os.path.join(self.data_path, 'bw', dfm_name)), 2)
+        bw_img = np.concatenate([bw_img, dfm_img], axis = 2)
+        if self.transform:
+            result = self.transform(image = color_img, mask = bw_img)
+            color_img = result['image']
+            bw_img = result['mask']
+        dfm_img = bw_img[:, :, 1]
+        bw_img = bw_img[:, :, 0]
+        color_img = self.ToTensor(color_img)
+        bw_img = self.ToTensor(bw_img)
+        dfm_img = self.ToTensor(dfm_img)
+        color_img = (color_img - 0.5) / 0.5
+        mask = generate_mask(bw_img.shape[1], bw_img.shape[2])
+        hint = torch.cat((color_img * mask, mask), 0)
+        return bw_img, color_img, hint, dfm_img
+class FineTuningDataset(torch.utils.data.Dataset):
+    def __init__(self, data_path, transform = None, mult_amount = 1):
+        self.data = [x for x in os.listdir(os.path.join(data_path, 'real_manga')) if x.find('_dfm') == -1]
+        self.color_data = [x for x in os.listdir(os.path.join(data_path, 'color'))]
+        self.data_path = data_path
+        self.transform = transform
+        self.mults_amount = mult_amount
+        np.random.shuffle(self.color_data)
+        self.ToTensor = transforms.ToTensor()
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        color_img = plt.imread(os.path.join(self.data_path, 'color', self.color_data[idx]))
+        image_name = self.data[idx]
+        if self.mults_amount > 1:
+            mult_number = np.random.choice(range(self.mults_amount))
+            bw_name = image_name[:image_name.rfind('.')] + '_' + str(self.mults_amount) + '.png'
+            dfm_name = image_name[:image_name.rfind('.')] + '_' + str(self.mults_amount) + '_dfm.png'
+        else:
+            bw_name = self.data[idx]
+            dfm_name =  os.path.splitext(self.data[idx])[0] + '_dfm.png'
+        bw_img =  np.expand_dims(plt.imread(os.path.join(self.data_path, 'real_manga', image_name)), 2)
+        dfm_img =  np.expand_dims(plt.imread(os.path.join(self.data_path, 'real_manga', dfm_name)), 2)
+        if self.transform:
+            result = self.transform(image = color_img)
+            color_img = result['image']
+            result = self.transform(image = bw_img,  mask = dfm_img)
+            bw_img = result['image']
+            dfm_img = result['mask']
+        color_img = self.ToTensor(color_img)
+        bw_img = self.ToTensor(bw_img)
+        dfm_img = self.ToTensor(dfm_img)
+        color_img = (color_img - 0.5) / 0.5
+        return bw_img, dfm_img, color_img

dataset/manga/train/bw/002-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: ab9ffbbdc32766ce64f758d0402194d5f78b444d6614e4a5088256ace4dad327
Pointer size: 132 Bytes
Size of remote file: 3.87 MB

dataset/manga/train/bw/003-0000-0000.png ADDED Viewed

dataset/manga/train/bw/x2-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: b605bc2accdddb7659463cbdf891a4ac5ac12c12dddd406de001d1737cfc2818
Pointer size: 132 Bytes
Size of remote file: 6.7 MB

dataset/manga/train/bw/x3-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: ebfd588a0e54d0380b3dfa522a3b8b8d16a40180ea66ba8d3da6b0abdac2f195
Pointer size: 132 Bytes
Size of remote file: 3.98 MB

dataset/manga/train/bw/x5-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: b1c940b720dc52aa4f8f5372fb8071b639f5ae8b75470983b5301aabd3d9bd2b
Pointer size: 132 Bytes
Size of remote file: 4.25 MB

dataset/manga/train/color/002-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: 1a289ac6f4c1b477e987eef553232e70cefd536b62cf7c0662e27d84c55b3f02
Pointer size: 132 Bytes
Size of remote file: 6.26 MB

dataset/manga/train/color/003-0000-0000.png ADDED Viewed

dataset/manga/train/color/004-0000-0000.png ADDED Viewed

dataset/manga/train/color/x1-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: 4fee65ac5d942ccbcf46250042077897a1a1d0288dbcbd4d4e01a95667b4321f
Pointer size: 132 Bytes
Size of remote file: 5.74 MB

dataset/manga/train/color/x2-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: 3934c58bf1876be001bd806f637d7722dbd6ba34ba9cc2aed98288019771a248
Pointer size: 133 Bytes
Size of remote file: 11.3 MB

dataset/manga/train/color/x3-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: e8690c5617dbb1215cfdc0f70a2e31156794b0abf21dde7f9c50bbe2675de71e
Pointer size: 132 Bytes
Size of remote file: 5.25 MB

dataset/manga/train/color/x4-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: c5d0f633082871aa456f2d7864d0d9f4fa1ca9ae2b1cc229df4927b38a4df40c
Pointer size: 132 Bytes
Size of remote file: 4.98 MB

dataset/manga/train/color/x5-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: acba9510555cebb18fbdb220ef3f22724617893f35d8a3fe1c509ed31f257dbb
Pointer size: 132 Bytes
Size of remote file: 5.63 MB

dataset/manga/train/real_manga/002-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: 1a289ac6f4c1b477e987eef553232e70cefd536b62cf7c0662e27d84c55b3f02
Pointer size: 132 Bytes
Size of remote file: 6.26 MB

dataset/manga/train/real_manga/003-0000-0000.png ADDED Viewed

dataset/manga/train/real_manga/004-0000-0000.png ADDED Viewed

dataset/manga/train/real_manga/x1-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: 4fee65ac5d942ccbcf46250042077897a1a1d0288dbcbd4d4e01a95667b4321f
Pointer size: 132 Bytes
Size of remote file: 5.74 MB

dataset/manga/train/real_manga/x2-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: 3934c58bf1876be001bd806f637d7722dbd6ba34ba9cc2aed98288019771a248
Pointer size: 133 Bytes
Size of remote file: 11.3 MB

dataset/manga/train/real_manga/x3-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: e8690c5617dbb1215cfdc0f70a2e31156794b0abf21dde7f9c50bbe2675de71e
Pointer size: 132 Bytes
Size of remote file: 5.25 MB

dataset/manga/train/real_manga/x4-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: c5d0f633082871aa456f2d7864d0d9f4fa1ca9ae2b1cc229df4927b38a4df40c
Pointer size: 132 Bytes
Size of remote file: 4.98 MB

dataset/manga/train/real_manga/x5-0000-0000.png ADDED Viewed

Git LFS Details

SHA256: acba9510555cebb18fbdb220ef3f22724617893f35d8a3fe1c509ed31f257dbb
Pointer size: 132 Bytes
Size of remote file: 5.63 MB

inference.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import torch
+import torch.nn as nn
+import numpy as np
+from utils.dataset_utils import get_sketch
+from utils.utils import resize_pad, generate_mask, extract_cbr, create_cbz, sorted_alphanumeric, subfolder_image_search, remove_folder
+from torchvision.transforms import ToTensor
+import os
+import matplotlib.pyplot as plt
+import argparse
+from model.models import Colorizer, Generator
+from model.extractor import get_seresnext_extractor
+from utils.xdog import XDoGSketcher
+from utils.utils import open_json
+import sys
+def colorize_without_hint(inp, colorizer, device = 'cpu', auto_hint = False, auto_hint_sigma = 0.003):
+    i_hint = torch.zeros(1, 4, inp.shape[2], inp.shape[3]).float().to(device)
+    with torch.no_grad():
+        fake_color, _ = colorizer(torch.cat([inp, i_hint], 1))
+    if auto_hint:
+        mask = generate_mask(fake_color.shape[2], fake_color.shape[3], full = False, prob = 1, sigma = auto_hint_sigma).unsqueeze(0)
+        mask = mask.to(device)
+        i_hint = torch.cat([fake_color * mask, mask], 1)
+        with torch.no_grad():
+            fake_color, _ = colorizer(torch.cat([inp, i_hint], 1))
+    return fake_color
+def process_image(image, sketcher, colorizer, auto_hint, auto_hint_sigma = 0.003, dfm = True, device = 'cpu', to_tensor = ToTensor()):
+    image, pad = resize_pad(image)
+    bw, dfm = get_sketch(image, sketcher, dfm)
+    bw = to_tensor(bw).unsqueeze(0).to(device)
+    dfm = to_tensor(dfm).unsqueeze(0).to(device)
+    output = colorize_without_hint(torch.cat([bw, dfm], 1), colorizer, device = device, auto_hint = auto_hint)
+    result = output[0].cpu().permute(1, 2, 0).numpy() * 0.5 + 0.5
+    if pad[0] != 0:
+        result = result[:-pad[0]]
+    if pad[1] != 0:
+        result = result[:, :-pad[1]]
+    return result
+def colorize_single_image(file_path, save_path, sketcher, colorizer, auto_hint, auto_hint_sigma = 0.003, dfm = True, device = 'cpu'):
+    try:
+        image = plt.imread(file_path)
+        colorization = process_image(image, sketcher, colorizer, auto_hint, auto_hint_sigma, dfm, device)
+        plt.imsave(save_path, colorization)
+    except KeyboardInterrupt:
+        sys.exit(0)
+    except:
+        print('Failed to colorize {}'.format(file_path))
+def colorize_images(source_path, target_path, sketcher, colorizer, auto_hint, auto_hint_sigma = 0.003, dfm = True, device = 'cpu'):
+    images = os.listdir(source_path)
+    for image_name in images:
+        file_path = os.path.join(source_path, image_name)
+        save_path = os.path.join(target_path, image_name)
+        colorize_single_image(file_path, save_path, sketcher, colorizer, auto_hint, auto_hint_sigma, dfm, device)
+def colorize_cbr(file_path, sketcher, colorizer, auto_hint, auto_hint_sigma = 0.003, dfm = True, device = 'cpu'):
+    file_name = os.path.splitext(os.path.basename(file_path))[0]
+    temp_path = 'temp_colorization'
+    if not  os.path.exists(temp_path):
+        os.makedirs(temp_path)
+    extract_cbr(file_path, temp_path)
+    images = subfolder_image_search(temp_path)
+    for image_path in images:
+        try:
+            image = plt.imread(image_path)
+            colorization = process_image(image, sketcher, colorizer, auto_hint, auto_hint_sigma, dfm, device)
+            plt.imsave(image_path, colorization)
+        except KeyboardInterrupt:
+            sys.exit(0)
+        except:
+            print('Failed to colorize {}'.format(image_path))
+    result_name = os.path.join(os.path.dirname(file_path), file_name + '_colorized.cbz')
+    create_cbz(result_name, images)
+    remove_folder(temp_path)
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-p", "--path", required=True)
+    parser.add_argument("-gen", "--generator", default = 'model/biggan.pth')
+    parser.add_argument("-ext", "--extractor", default = 'model/extractor.pth')
+    parser.add_argument("-s", "--sigma", type = float, default = 0.003)
+    parser.add_argument('-g', '--gpu', dest = 'gpu', action = 'store_true')
+    parser.add_argument('-ah', '--auto', dest = 'autohint', action = 'store_true')
+    parser.set_defaults(gpu = False)
+    parser.set_defaults(autohint = False)
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parse_args()
+    if args.gpu:
+        device = 'cuda'
+    else:
+        device = 'cpu'
+    generator = Generator()
+    generator.load_state_dict(torch.load(args.generator))
+    extractor = get_seresnext_extractor()
+    extractor.load_state_dict(torch.load(args.extractor))
+    colorizer = Colorizer(generator, extractor)
+    colorizer = colorizer.eval().to(device)
+    sketcher = XDoGSketcher()
+    xdog_config = open_json('configs/xdog_config.json')
+    for key in xdog_config.keys():
+        if key in sketcher.params:
+            sketcher.params[key] = xdog_config[key]
+    if os.path.isdir(args.path):
+        colorization_path = os.path.join(args.path, 'colorization')
+        if not os.path.exists(colorization_path):
+            os.makedirs(colorization_path)
+        colorize_images(args.path, colorization_path, sketcher, colorizer, args.autohint, args.sigma, device = device)
+    elif os.path.isfile(args.path):
+        split = os.path.splitext(args.path)
+        if split[1].lower() in ('.cbr', '.cbz', '.rar', '.zip'):
+            colorize_cbr(args.path, sketcher, colorizer, args.autohint, args.sigma, device = device)
+        elif split[1].lower() in ('.jpg', '.png'):
+            new_image_path = split[0] + '_colorized' + split[1]
+            colorize_single_image(args.path, new_image_path, sketcher, colorizer, args.autohint, args.sigma, device = device)
+        else:
+            print('Wrong format')
+    else:
+        print('Wrong path')

model/extractor.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee3c59f02ac8c59298fd9b819fa33d2efa168847e15e4be39b35c286f7c18607
+size 6340842

model/extractor.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import torch
+import torch.nn as nn
+import math
+'''https://github.com/blandocs/Tag2Pix/blob/master/model/pretrained.py'''
+# Pretrained version
+class Selayer(nn.Module):
+    def __init__(self, inplanes):
+        super(Selayer, self).__init__()
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.conv1 = nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1)
+        self.conv2 = nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        out = self.global_avgpool(x)
+        out = self.conv1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.sigmoid(out)
+        return x * out
+class BottleneckX_Origin(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, cardinality, stride=1, downsample=None):
+        super(BottleneckX_Origin, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes * 2)
+        self.conv2 = nn.Conv2d(planes * 2, planes * 2, kernel_size=3, stride=stride,
+                               padding=1, groups=cardinality, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes * 2)
+        self.conv3 = nn.Conv2d(planes * 2, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.selayer = Selayer(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        out = self.selayer(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class SEResNeXt_extractor(nn.Module):
+    def __init__(self, block, layers, input_channels=3, cardinality=32):
+        super(SEResNeXt_extractor, self).__init__()
+        self.cardinality = cardinality
+        self.inplanes = 64
+        self.input_channels = input_channels
+        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, self.cardinality, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, self.cardinality))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        return x
+def get_seresnext_extractor():
+    return SEResNeXt_extractor(BottleneckX_Origin, [3, 4, 6, 3], 1)

model/models.py ADDED Viewed

	@@ -0,0 +1,422 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as M
+import math
+from torch import Tensor
+from torch.nn import Parameter
+'''https://github.com/orashi/AlacGAN/blob/master/models/standard.py'''
+def l2normalize(v, eps=1e-12):
+    return v / (v.norm() + eps)
+class SpectralNorm(nn.Module):
+    def __init__(self, module, name='weight', power_iterations=1):
+        super(SpectralNorm, self).__init__()
+        self.module = module
+        self.name = name
+        self.power_iterations = power_iterations
+        if not self._made_params():
+            self._make_params()
+    def _update_u_v(self):
+        u = getattr(self.module, self.name + "_u")
+        v = getattr(self.module, self.name + "_v")
+        w = getattr(self.module, self.name + "_bar")
+        height = w.data.shape[0]
+        for _ in range(self.power_iterations):
+            v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
+            u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))
+        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
+        sigma = u.dot(w.view(height, -1).mv(v))
+        setattr(self.module, self.name, w / sigma.expand_as(w))
+    def _made_params(self):
+        try:
+            u = getattr(self.module, self.name + "_u")
+            v = getattr(self.module, self.name + "_v")
+            w = getattr(self.module, self.name + "_bar")
+            return True
+        except AttributeError:
+            return False
+    def _make_params(self):
+        w = getattr(self.module, self.name)
+        height = w.data.shape[0]
+        width = w.view(height, -1).data.shape[1]
+        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
+        v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
+        u.data = l2normalize(u.data)
+        v.data = l2normalize(v.data)
+        w_bar = Parameter(w.data)
+        del self.module._parameters[self.name]
+        self.module.register_parameter(self.name + "_u", u)
+        self.module.register_parameter(self.name + "_v", v)
+        self.module.register_parameter(self.name + "_bar", w_bar)
+    def forward(self, *args):
+        self._update_u_v()
+        return self.module.forward(*args)
+class Selayer(nn.Module):
+    def __init__(self, inplanes):
+        super(Selayer, self).__init__()
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.conv1 = nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1)
+        self.conv2 = nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        out = self.global_avgpool(x)
+        out = self.conv1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.sigmoid(out)
+        return x * out
+class SelayerSpectr(nn.Module):
+    def __init__(self, inplanes):
+        super(SelayerSpectr, self).__init__()
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.conv1 = SpectralNorm(nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1))
+        self.conv2 = SpectralNorm(nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1))
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        out = self.global_avgpool(x)
+        out = self.conv1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.sigmoid(out)
+        return x * out
+class ResNeXtBottleneck(nn.Module):
+    def __init__(self, in_channels=256, out_channels=256, stride=1, cardinality=32, dilate=1):
+        super(ResNeXtBottleneck, self).__init__()
+        D = out_channels // 2
+        self.out_channels = out_channels
+        self.conv_reduce = nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False)
+        self.conv_conv = nn.Conv2d(D, D, kernel_size=2 + stride, stride=stride, padding=dilate, dilation=dilate,
+                                   groups=cardinality,
+                                   bias=False)
+        self.conv_expand = nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
+        self.shortcut = nn.Sequential()
+        if stride != 1:
+            self.shortcut.add_module('shortcut',
+                                     nn.AvgPool2d(2, stride=2))
+        self.selayer = Selayer(out_channels)
+    def forward(self, x):
+        bottleneck = self.conv_reduce.forward(x)
+        bottleneck = F.leaky_relu(bottleneck, 0.2, True)
+        bottleneck = self.conv_conv.forward(bottleneck)
+        bottleneck = F.leaky_relu(bottleneck, 0.2, True)
+        bottleneck = self.conv_expand.forward(bottleneck)
+        bottleneck = self.selayer(bottleneck)
+        x = self.shortcut.forward(x)
+        return x + bottleneck
+class SpectrResNeXtBottleneck(nn.Module):
+    def __init__(self, in_channels=256, out_channels=256, stride=1, cardinality=32, dilate=1):
+        super(SpectrResNeXtBottleneck, self).__init__()
+        D = out_channels // 2
+        self.out_channels = out_channels
+        self.conv_reduce = SpectralNorm(nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False))
+        self.conv_conv = SpectralNorm(nn.Conv2d(D, D, kernel_size=2 + stride, stride=stride, padding=dilate, dilation=dilate,
+                                   groups=cardinality,
+                                   bias=False))
+        self.conv_expand = SpectralNorm(nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False))
+        self.shortcut = nn.Sequential()
+        if stride != 1:
+            self.shortcut.add_module('shortcut',
+                                     nn.AvgPool2d(2, stride=2))
+        self.selayer = SelayerSpectr(out_channels)
+    def forward(self, x):
+        bottleneck = self.conv_reduce.forward(x)
+        bottleneck = F.leaky_relu(bottleneck, 0.2, True)
+        bottleneck = self.conv_conv.forward(bottleneck)
+        bottleneck = F.leaky_relu(bottleneck, 0.2, True)
+        bottleneck = self.conv_expand.forward(bottleneck)
+        bottleneck = self.selayer(bottleneck)
+        x = self.shortcut.forward(x)
+        return x + bottleneck
+class FeatureConv(nn.Module):
+    def __init__(self, input_dim=512, output_dim=512):
+        super(FeatureConv, self).__init__()
+        no_bn = True
+        seq = []
+        seq.append(nn.Conv2d(input_dim, output_dim, kernel_size=3, stride=1, padding=1, bias=False))
+        if not no_bn: seq.append(nn.BatchNorm2d(output_dim))
+        seq.append(nn.ReLU(inplace=True))
+        seq.append(nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=2, padding=1, bias=False))
+        if not no_bn: seq.append(nn.BatchNorm2d(output_dim))
+        seq.append(nn.ReLU(inplace=True))
+        seq.append(nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=1, padding=1, bias=False))
+        seq.append(nn.ReLU(inplace=True))
+        self.network = nn.Sequential(*seq)
+    def forward(self, x):
+        return self.network(x)
+class Generator(nn.Module):
+    def __init__(self, ngf=64):
+        super(Generator, self).__init__()
+        self.feature_conv = FeatureConv()
+        self.to0 =  self._make_encoder_block_first(6, 32)
+        self.to1 = self._make_encoder_block(32, 64)
+        self.to2 = self._make_encoder_block(64, 128)
+        self.to3 = self._make_encoder_block(128, 256)
+        self.to4 = self._make_encoder_block(256, 512)
+        self.deconv_for_decoder = nn.Sequential(
+            nn.ConvTranspose2d(256, 128, 3, stride=2, padding=1, output_padding=1), # output is 64 * 64
+            nn.LeakyReLU(0.2),
+            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1), # output is 128 * 128
+            nn.LeakyReLU(0.2),
+            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1), # output is 256 * 256
+            nn.LeakyReLU(0.2),
+            nn.ConvTranspose2d(32, 3, 3, stride=1, padding=1, output_padding=0), # output is 256 * 256
+            nn.Tanh(),
+        )
+        tunnel4 = nn.Sequential(*[ResNeXtBottleneck(ngf * 8, ngf * 8, cardinality=32, dilate=1) for _ in range(20)])
+        self.tunnel4 = nn.Sequential(nn.Conv2d(ngf * 8 + 512, ngf * 8, kernel_size=3, stride=1, padding=1),
+                                     nn.LeakyReLU(0.2, True),
+                                     tunnel4,
+                                     nn.Conv2d(ngf * 8, ngf * 4 * 4, kernel_size=3, stride=1, padding=1),
+                                     nn.PixelShuffle(2),
+                                     nn.LeakyReLU(0.2, True)
+                                     )  # 64
+        depth = 2
+        tunnel = [ResNeXtBottleneck(ngf * 4, ngf * 4, cardinality=32, dilate=1) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(ngf * 4, ngf * 4, cardinality=32, dilate=2) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(ngf * 4, ngf * 4, cardinality=32, dilate=4) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(ngf * 4, ngf * 4, cardinality=32, dilate=2),
+                   ResNeXtBottleneck(ngf * 4, ngf * 4, cardinality=32, dilate=1)]
+        tunnel3 = nn.Sequential(*tunnel)
+        self.tunnel3 = nn.Sequential(nn.Conv2d(ngf * 8, ngf * 4, kernel_size=3, stride=1, padding=1),
+                                     nn.LeakyReLU(0.2, True),
+                                     tunnel3,
+                                     nn.Conv2d(ngf * 4, ngf * 2 * 4, kernel_size=3, stride=1, padding=1),
+                                     nn.PixelShuffle(2),
+                                     nn.LeakyReLU(0.2, True)
+                                     )  # 128
+        tunnel = [ResNeXtBottleneck(ngf * 2, ngf * 2, cardinality=32, dilate=1) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(ngf * 2, ngf * 2, cardinality=32, dilate=2) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(ngf * 2, ngf * 2, cardinality=32, dilate=4) for _ in range(depth)]
+        tunnel += [ResNeXtBottleneck(ngf * 2, ngf * 2, cardinality=32, dilate=2),
+                   ResNeXtBottleneck(ngf * 2, ngf * 2, cardinality=32, dilate=1)]
+        tunnel2 = nn.Sequential(*tunnel)
+        self.tunnel2 = nn.Sequential(nn.Conv2d(ngf * 4, ngf * 2, kernel_size=3, stride=1, padding=1),
+                                     nn.LeakyReLU(0.2, True),
+                                     tunnel2,
+                                     nn.Conv2d(ngf * 2, ngf * 4, kernel_size=3, stride=1, padding=1),
+                                     nn.PixelShuffle(2),
+                                     nn.LeakyReLU(0.2, True)
+                                     )
+        tunnel = [ResNeXtBottleneck(ngf, ngf, cardinality=16, dilate=1)]
+        tunnel += [ResNeXtBottleneck(ngf, ngf, cardinality=16, dilate=2)]
+        tunnel += [ResNeXtBottleneck(ngf, ngf, cardinality=16, dilate=4)]
+        tunnel += [ResNeXtBottleneck(ngf, ngf, cardinality=16, dilate=2),
+                   ResNeXtBottleneck(ngf, ngf, cardinality=16, dilate=1)]
+        tunnel1 = nn.Sequential(*tunnel)
+        self.tunnel1 = nn.Sequential(nn.Conv2d(ngf * 2, ngf, kernel_size=3, stride=1, padding=1),
+                                     nn.LeakyReLU(0.2, True),
+                                     tunnel1,
+                                     nn.Conv2d(ngf, ngf * 2, kernel_size=3, stride=1, padding=1),
+                                     nn.PixelShuffle(2),
+                                     nn.LeakyReLU(0.2, True)
+                                     )
+        self.exit = nn.Conv2d(ngf, 3, kernel_size=3, stride=1, padding=1)
+    def _make_encoder_block(self, inplanes, planes):
+        return nn.Sequential(
+            nn.Conv2d(inplanes, planes, 3, 2, 1),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(planes, planes, 3, 1, 1),
+            nn.LeakyReLU(0.2),
+        )
+    def _make_encoder_block_first(self, inplanes, planes):
+        return nn.Sequential(
+            nn.Conv2d(inplanes, planes, 3, 1, 1),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(planes, planes, 3, 1, 1),
+            nn.LeakyReLU(0.2),
+        )
+    def forward(self, sketch, sketch_feat):
+        x0 = self.to0(sketch)
+        x1 = self.to1(x0)
+        x2 = self.to2(x1)
+        x3 = self.to3(x2)  # !
+        x4 = self.to4(x3)
+        sketch_feat = self.feature_conv(sketch_feat)
+        out = self.tunnel4(torch.cat([x4, sketch_feat], 1))
+        x = self.tunnel3(torch.cat([out, x3], 1))
+        x = self.tunnel2(torch.cat([x, x2], 1))
+        x = self.tunnel1(torch.cat([x, x1], 1))
+        x = torch.tanh(self.exit(torch.cat([x, x0], 1)))
+        decoder_output = self.deconv_for_decoder(out)
+        return x, decoder_output
+'''
+class Colorizer(nn.Module):
+    def __init__(self, extractor_path = 'model/model.pth'):
+        super(Colorizer, self).__init__()
+        self.generator = Generator()
+        self.extractor = se_resnext_half(dump_path=extractor_path, num_classes=370, input_channels=1)
+    def extractor_eval(self):
+        for param in self.extractor.parameters():
+            param.requires_grad = False
+    def extractor_train(self):
+        for param in extractor.parameters():
+            param.requires_grad = True
+    def forward(self, x, extractor_grad = False):
+        if extractor_grad:
+            features = self.extractor(x[:, 0:1])
+        else:
+            with torch.no_grad():
+                features = self.extractor(x[:, 0:1]).detach()
+        fake, guide = self.generator(x, features)
+        return fake, guide
+'''
+class Colorizer(nn.Module):
+    def __init__(self, generator_model, extractor_model):
+        super(Colorizer, self).__init__()
+        self.generator = generator_model
+        self.extractor = extractor_model
+    def load_generator_weights(self, gen_weights):
+        self.generator.load_state_dict(gen_weights)
+    def load_extractor_weights(self, ext_weights):
+        self.extractor.load_state_dict(ext_weights)
+    def extractor_eval(self):
+        for param in self.extractor.parameters():
+            param.requires_grad = False
+        self.extractor.eval()
+    def extractor_train(self):
+        for param in extractor.parameters():
+            param.requires_grad = True
+        self.extractor.train()
+    def forward(self, x, extractor_grad = False):
+        if extractor_grad:
+            features = self.extractor(x[:, 0:1])
+        else:
+            with torch.no_grad():
+                features = self.extractor(x[:, 0:1]).detach()
+        fake, guide = self.generator(x, features)
+        return fake, guide
+class Discriminator(nn.Module):
+    def __init__(self, ndf=64):
+        super(Discriminator, self).__init__()
+        self.feed = nn.Sequential(SpectralNorm(nn.Conv2d(3, 64, 3, 1, 1)),
+                                nn.LeakyReLU(0.2, True),
+                                SpectralNorm(nn.Conv2d(64, 64, 3, 2, 0)),
+                                nn.LeakyReLU(0.2, True),
+                                  SpectrResNeXtBottleneck(ndf, ndf, cardinality=8, dilate=1),
+                                  SpectrResNeXtBottleneck(ndf, ndf, cardinality=8, dilate=1, stride=2),  # 128
+                                  SpectralNorm(nn.Conv2d(ndf, ndf * 2, kernel_size=1, stride=1, padding=0, bias=False)),
+                                  nn.LeakyReLU(0.2, True),
+                                  SpectrResNeXtBottleneck(ndf * 2, ndf * 2, cardinality=8, dilate=1),
+                                  SpectrResNeXtBottleneck(ndf * 2, ndf * 2, cardinality=8, dilate=1, stride=2),  # 64
+                                  SpectralNorm(nn.Conv2d(ndf * 2, ndf * 4, kernel_size=1, stride=1, padding=0, bias=False)),
+                                  nn.LeakyReLU(0.2, True),
+                                  SpectrResNeXtBottleneck(ndf * 4, ndf * 4, cardinality=8, dilate=1),
+                                  SpectrResNeXtBottleneck(ndf * 4, ndf * 4, cardinality=8, dilate=1, stride=2),  # 32,
+                                  SpectralNorm(nn.Conv2d(ndf * 4, ndf * 8, kernel_size=1, stride=1, padding=1, bias=False)),
+                                  nn.LeakyReLU(0.2, True),
+                                  SpectrResNeXtBottleneck(ndf * 8, ndf * 8, cardinality=8, dilate=1),
+                                  SpectrResNeXtBottleneck(ndf * 8, ndf * 8, cardinality=8, dilate=1, stride=2),  # 16
+                                  SpectrResNeXtBottleneck(ndf * 8, ndf * 8, cardinality=8, dilate=1),
+                                  SpectrResNeXtBottleneck(ndf * 8, ndf * 8, cardinality=8, dilate=1),
+                                  nn.AdaptiveAvgPool2d((1, 1))
+                                  )
+        self.out = nn.Linear(512, 1)
+    def forward(self, color):
+        x = self.feed(color)
+        out = self.out(x.view(color.size(0), -1))
+        return out
+class Content(nn.Module):
+    def __init__(self, path):
+        super(Content, self).__init__()
+        vgg16 = M.vgg16()
+        vgg16.load_state_dict(torch.load(path))
+        vgg16.features = nn.Sequential(
+            *list(vgg16.features.children())[:9]
+        )
+        self.model = vgg16.features
+        self.register_buffer('mean', torch.FloatTensor([0.485 - 0.5, 0.456 - 0.5, 0.406 - 0.5]).view(1, 3, 1, 1))
+        self.register_buffer('std', torch.FloatTensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
+    def forward(self, images):
+        return self.model((images.mul(0.5) - self.mean) / self.std)

model/vgg16-397923af.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:397923af8e79cdbb6a7127f12361acd7a2f83e06b05044ddf496e83de57a5bf0
+size 553433881

train.py ADDED Viewed

	@@ -0,0 +1,294 @@

+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+import albumentations as albu
+import argparse
+import datetime
+from utils.utils import open_json, weights_init, weights_init_spectr, generate_mask
+from model.models import Colorizer, Generator, Content, Discriminator
+from model.extractor import get_seresnext_extractor
+from dataset.datasets import TrainDataset, FineTuningDataset
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-p", "--path", required=True, help = "dataset path")
+    parser.add_argument('-ft', '--fine_tuning', dest = 'fine_tuning', action = 'store_true')
+    parser.add_argument('-g', '--gpu', dest = 'gpu', action = 'store_true')
+    parser.set_defaults(fine_tuning = False)
+    parser.set_defaults(gpu = False)
+    args = parser.parse_args()
+    return args
+def get_transforms():
+    return albu.Compose([albu.RandomCrop(512, 512, always_apply = True), albu.HorizontalFlip(p = 0.5)], p = 1.)
+def get_dataloaders(data_path, transforms, batch_size, fine_tuning, mult_number):
+    train_dataset = TrainDataset(data_path, transforms, mult_number)
+    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
+    if fine_tuning:
+        finetuning_dataset = FineTuningDataset(data_path, transforms)
+        finetuning_dataloader = torch.utils.data.DataLoader(finetuning_dataset, batch_size = batch_size, shuffle = True)
+    return train_dataloader, finetuning_dataloader
+def get_models(device):
+    generator = Generator()
+    extractor = get_seresnext_extractor()
+    colorizer = Colorizer(generator, extractor)
+    colorizer.extractor_eval()
+    colorizer = colorizer.to(device)
+    discriminator = Discriminator().to(device)
+    content = Content('model/vgg16-397923af.pth').eval().to(device)
+    for param in content.parameters():
+        param.requires_grad = False
+    return colorizer, discriminator, content
+def set_weights(colorizer, discriminator):
+    colorizer.generator.apply(weights_init)
+    colorizer.load_extractor_weights(torch.load('model/extractor.pth'))
+    discriminator.apply(weights_init_spectr)
+def generator_loss(disc_output, true_labels, main_output, guide_output, real_image, content_gen, content_true, dist_loss = nn.L1Loss(), content_dist_loss = nn.MSELoss(), class_loss = nn.BCEWithLogitsLoss()):
+    sim_loss_full = dist_loss(main_output, real_image)
+    sim_loss_guide = dist_loss(guide_output, real_image)
+    adv_loss = class_loss(disc_output, true_labels)
+    content_loss = content_dist_loss(content_gen, content_true)
+    sum_loss = 10 * (sim_loss_full + 0.9 * sim_loss_guide)  + adv_loss + content_loss
+    return sum_loss
+def get_optimizers(colorizer, discriminator, generator_lr, discriminator_lr):
+    optimizerG = optim.Adam(colorizer.generator.parameters(), lr = generator_lr, betas=(0.5, 0.9))
+    optimizerD = optim.Adam(discriminator.parameters(), lr = discriminator_lr, betas=(0.5, 0.9))
+    return optimizerG, optimizerD
+def generator_step(inputs, colorizer, discriminator, content,  loss_function, optimizer, device, white_penalty = True):
+    for p in discriminator.parameters():
+        p.requires_grad = False
+    for p in colorizer.generator.parameters():
+        p.requires_grad = True
+    colorizer.generator.zero_grad()
+    bw, color, hint, dfm = inputs
+    bw, color, hint, dfm = bw.to(device), color.to(device), hint.to(device), dfm.to(device)
+    fake, guide = colorizer(torch.cat([bw, dfm, hint], 1))
+    logits_fake = discriminator(fake)
+    y_real = torch.ones((bw.size(0), 1), device = device)
+    content_fake = content(fake)
+    with torch.no_grad():
+        content_true = content(color)
+    generator_loss = loss_function(logits_fake, y_real, fake, guide, color, content_fake, content_true)
+    if white_penalty:
+        mask = (~((color > 0.85).float().sum(dim = 1) == 3).unsqueeze(1).repeat((1, 3, 1, 1 ))).float()
+        white_zones = mask * (fake + 1) / 2
+        white_penalty = (torch.pow(white_zones.sum(dim = 1), 2).sum(dim = (1, 2)) / (mask.sum(dim = (1, 2, 3)) + 1)).mean()
+        generator_loss += white_penalty
+    generator_loss.backward()
+    optimizer.step()
+    return generator_loss.item()
+def discriminator_step(inputs, colorizer, discriminator, optimizer, device, loss_function = nn.BCEWithLogitsLoss()):
+    for p in discriminator.parameters():
+        p.requires_grad = True
+    for p in colorizer.generator.parameters():
+        p.requires_grad = False
+    discriminator.zero_grad()
+    bw, color, hint, dfm = inputs
+    bw, color, hint, dfm = bw.to(device), color.to(device), hint.to(device), dfm.to(device)
+    y_real = torch.full((bw.size(0), 1), 0.9, device = device)
+    y_fake = torch.zeros((bw.size(0), 1), device = device)
+    with torch.no_grad():
+        fake_color, _ = colorizer(torch.cat([bw, dfm, hint], 1))
+        fake_color.detach()
+    logits_fake = discriminator(fake_color)
+    logits_real = discriminator(color)
+    fake_loss = loss_function(logits_fake, y_fake)
+    real_loss = loss_function(logits_real, y_real)
+    discriminator_loss = real_loss + fake_loss
+    discriminator_loss.backward()
+    optimizer.step()
+    return discriminator_loss.item()
+def decrease_lr(optimizer, rate):
+    for group in optimizer.param_groups:
+        group['lr'] /= rate
+def set_lr(optimizer, value):
+    for group in optimizer.param_groups:
+        group['lr'] = value
+def train(colorizer, discriminator, content, dataloader, epochs, colorizer_optimizer, discriminator_optimizer, lr_decay_epoch = -1, device = 'cpu'):
+    colorizer.generator.train()
+    discriminator.train()
+    disc_step  = True
+    for epoch in range(epochs):
+        if (epoch == lr_decay_epoch):
+            decrease_lr(colorizer_optimizer, 10)
+            decrease_lr(discriminator_optimizer, 10)
+        sum_disc_loss = 0
+        sum_gen_loss = 0
+        for n, inputs in enumerate(dataloader):
+            if n % 5 == 0:
+                print(datetime.datetime.now().time())
+                print('Step : %d Discr loss: %.4f Gen loss : %.4f \n'%(n, sum_disc_loss / (n // 2 + 1), sum_gen_loss / (n // 2 + 1)))
+            if disc_step:
+                step_loss = discriminator_step(inputs, colorizer, discriminator, discriminator_optimizer, device)
+                sum_disc_loss += step_loss
+            else:
+                step_loss = generator_step(inputs, colorizer, discriminator, content, generator_loss, colorizer_optimizer, device)
+                sum_gen_loss += step_loss
+            disc_step = disc_step ^ True
+        print(datetime.datetime.now().time())
+        print('Epoch : %d Discr loss: %.4f Gen loss : %.4f \n'%(epoch, sum_disc_loss / (n // 2 + 1), sum_gen_loss / (n // 2 + 1)))
+def fine_tuning_step(data_iter, colorizer, discriminator, gen_optimizer, disc_optimizer, device, loss_function = nn.BCEWithLogitsLoss()):
+    for p in discriminator.parameters():
+        p.requires_grad = True
+    for p in colorizer.generator.parameters():
+        p.requires_grad = False
+    for cur_disc_step in range(5):
+        discriminator.zero_grad()
+        bw, dfm, color_for_real = data_iter.next()
+        bw, dfm, color_for_real = bw.to(device), dfm.to(device), color_for_real.to(device)
+        y_real = torch.full((bw.size(0), 1), 0.9, device = device)
+        y_fake = torch.zeros((bw.size(0), 1), device = device)
+        empty_hint = torch.zeros(bw.shape[0], 4, bw.shape[2] , bw.shape[3] ).float().to(device)
+        with torch.no_grad():
+            fake_color_manga, _ = colorizer(torch.cat([bw, dfm, empty_hint ], 1))
+            fake_color_manga.detach()
+        logits_fake = discriminator(fake_color_manga)
+        logits_real = discriminator(color_for_real)
+        fake_loss = loss_function(logits_fake, y_fake)
+        real_loss = loss_function(logits_real, y_real)
+        discriminator_loss = real_loss + fake_loss
+        discriminator_loss.backward()
+        disc_optimizer.step()
+    for p in discriminator.parameters():
+        p.requires_grad = False
+    for p in colorizer.generator.parameters():
+        p.requires_grad = True
+    colorizer.generator.zero_grad()
+    bw, dfm, _ = data_iter.next()
+    bw, dfm = bw.to(device), dfm.to(device)
+    y_real = torch.ones((bw.size(0), 1), device = device)
+    empty_hint = torch.zeros(bw.shape[0], 4, bw.shape[2] , bw.shape[3]).float().to(device)
+    fake_manga, _ = colorizer(torch.cat([bw, dfm, empty_hint], 1))
+    logits_fake = discriminator(fake_manga)
+    adv_loss = loss_function(logits_fake, y_real)
+    generator_loss = adv_loss
+    generator_loss.backward()
+    gen_optimizer.step()
+def fine_tuning(colorizer, discriminator, content, dataloader, iterations, colorizer_optimizer, discriminator_optimizer, data_iter, device = 'cpu'):
+    colorizer.generator.train()
+    discriminator.train()
+    disc_step = True
+    for n, inputs in enumerate(dataloader):
+        if n == iterations:
+            return
+        if disc_step:
+            discriminator_step(inputs, colorizer, discriminator, discriminator_optimizer, device)
+        else:
+            generator_step(inputs, colorizer, discriminator, content, generator_loss, colorizer_optimizer, device)
+        disc_step = disc_step ^ True
+        if n % 10 == 5:
+            fine_tuning_step(data_iter, colorizer, discriminator, colorizer_optimizer, discriminator_optimizer, device)
+if __name__ == '__main__':
+    args = parse_args()
+    config = open_json('configs/train_config.json')
+    if args.gpu:
+        device = 'cuda'
+    else:
+        device = 'cpu'
+    augmentations = get_transforms()
+    train_dataloader, ft_dataloader = get_dataloaders(args.path, augmentations, config['batch_size'], args.fine_tuning, config['number_of_mults'])
+    colorizer, discriminator, content = get_models(device)
+    set_weights(colorizer, discriminator)
+    gen_optimizer, disc_optimizer = get_optimizers(colorizer, discriminator, config['generator_lr'], config['discriminator_lr'])
+    train(colorizer, discriminator, content, train_dataloader, config['epochs'], gen_optimizer, disc_optimizer, config['lr_decrease_epoch'], device)
+    if args.fine_tuning:
+        set_lr(gen_optimizer, config["finetuning_generator_lr"])
+        fine_tuning(colorizer, discriminator, content, train_dataloader, config['finetuning_iterations'], gen_optimizer, disc_optimizer, iter(ft_dataloader), device)
+    torch.save(colorizer.generator.state_dict(), str(datetime.datetime.now().time()))

utils/dataset_utils.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+import snowy
+import os
+def get_resized_image(img, size):
+    if len(img.shape) == 2:
+        img = np.repeat(np.expand_dims(img, 2), 3, 2)
+    if (img.shape[0] < img.shape[1]):
+        height = img.shape[0]
+        ratio = height / size
+        width = int(np.ceil(img.shape[1] / ratio))
+        img = cv2.resize(img, (width, size), interpolation = cv2.INTER_AREA)
+    else:
+        width = img.shape[1]
+        ratio = width / size
+        height = int(np.ceil(img.shape[0] / ratio))
+        img = cv2.resize(img, (size, height), interpolation = cv2.INTER_AREA)
+    if (img.dtype == 'float32'):
+        np.clip(img, 0, 1, out = img)
+    return img
+def get_sketch_image(img, sketcher, mult_val):
+    if mult_val:
+        sketch_image = sketcher.get_sketch_with_resize(img, mult = mult_val)
+    else:
+        sketch_image = sketcher.get_sketch_with_resize(img)
+    return sketch_image
+def get_dfm_image(sketch):
+    dfm_image = snowy.unitize(snowy.generate_sdf(np.expand_dims(1 - sketch, 2) != 0)).squeeze()
+    return dfm_image
+def get_sketch(image, sketcher, dfm, mult = None):
+    sketch_image = get_sketch_image(image, sketcher, mult)
+    dfm_image = None
+    if dfm:
+        dfm_image = get_dfm_image(sketch_image)
+    sketch_image = (sketch_image * 255).astype('uint8')
+    if dfm:
+        dfm_image = (dfm_image * 255).astype('uint8')
+    return sketch_image, dfm_image
+def get_sketches(image, sketcher, mult_list, dfm):
+    for mult in mult_list:
+        yield get_sketch(image, sketcher, dfm, mult)
+def create_resized_dataset(source_path, target_path, side_size):
+    images = os.listdir(source_path)
+    for image_name in images:
+        new_image_name = image_name[:image_name.rfind('.')] + '.png'
+        new_path = os.path.join(target_path, new_image_name)
+        if not os.path.exists(new_path):
+            try:
+                image = cv2.imread(os.path.join(source_path, image_name))
+                if image is None:
+                    raise Exception()
+                image = get_resized_image(image, side_size)
+                cv2.imwrite(new_path, image)
+            except:
+                print('Failed to process {}'.format(image_name))
+def create_sketches_dataset(source_path, target_path, sketcher, mult_list, dfm = False):
+    images = os.listdir(source_path)
+    for image_name in images:
+        try:
+            image = cv2.imread(os.path.join(source_path, image_name))
+            if image is None:
+                raise Exception()
+            for number, (sketch_image, dfm_image) in enumerate(get_sketches(image, sketcher, mult_list, dfm)):
+                new_sketch_name = image_name[:image_name.rfind('.')] + '_' + str(number) + '.png'
+                cv2.imwrite(os.path.join(target_path, new_sketch_name), sketch_image)
+                if dfm:
+                    dfm_name = image_name[:image_name.rfind('.')] + '_' + str(number) + '_dfm.png'
+                    cv2.imwrite(os.path.join(target_path, dfm_name), dfm_image)
+        except:
+            print('Failed to process {}'.format(image_name))
+def create_dataset(source_path, target_path, sketcher, mult_list, side_size, dfm = False):
+    images = os.listdir(source_path)
+    color_path = os.path.join(target_path, 'color')
+    sketch_path = os.path.join(target_path, 'bw')
+    if not os.path.exists(color_path):
+        os.makedirs(color_path)
+    if not os.path.exists(sketch_path):
+        os.makedirs(sketch_path)
+    for image_name in images:
+        new_image_name = image_name[:image_name.rfind('.')] + '.png'
+        try:
+            image = cv2.imread(os.path.join(source_path, image_name))
+            if image is None:
+                raise Exception()
+            resized_image = get_resized_image(image, side_size)
+            cv2.imwrite(os.path.join(color_path, new_image_name), resized_image)
+            for number, (sketch_image, dfm_image) in enumerate(get_sketches(resized_image, sketcher, mult_list, dfm)):
+                new_sketch_name = image_name[:image_name.rfind('.')] + '_' + str(number) + '.png'
+                cv2.imwrite(os.path.join(sketch_path, new_sketch_name), sketch_image)
+                if dfm:
+                    dfm_name = image_name[:image_name.rfind('.')] + '_' + str(number) + '_dfm.png'
+                    cv2.imwrite(os.path.join(sketch_path, dfm_name), dfm_image)
+        except:
+            print('Failed to process {}'.format(image_name))

utils/utils.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import torch
+import torch.nn as nn
+import numpy as np
+import scipy.stats as stats
+import cv2
+import json
+import patoolib
+import re
+from pathlib import Path
+from shutil import rmtree
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv2d') != -1:
+        nn.init.xavier_uniform_(m.weight.data)
+def weights_init_spectr(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv2d') != -1:
+        nn.init.xavier_uniform_(m.weight_bar.data)
+def generate_mask(height, width, mu = 1, sigma = 0.0005, prob = 0.5, full = True, full_prob = 0.01):
+    X = stats.truncnorm((0 - mu) / sigma, (1 - mu) / sigma, loc=mu, scale=sigma)
+    if full:
+        if (np.random.binomial(1, p = full_prob) == 1):
+            return torch.ones(1, height, width).float()
+    if np.random.binomial(1, p = prob) == 1:
+        mask = torch.rand(1, height, width).ge(X.rvs(1)[0]).float()
+    else:
+        mask = torch.zeros(1, height, width).float()
+    return mask
+def resize_pad(img, size = 512):
+    if len(img.shape) == 2:
+        img = np.expand_dims(img, 2)
+    if img.shape[2] == 1:
+        img = np.repeat(img, 3, 2)
+    if img.shape[2] == 4:
+        img = img[:, :, :3]
+    pad = None
+    if (img.shape[0] < img.shape[1]):
+        height = img.shape[0]
+        ratio = height / size
+        width = int(np.ceil(img.shape[1] / ratio))
+        img = cv2.resize(img, (width, size), interpolation = cv2.INTER_AREA)
+        new_width = width
+        while (new_width % 32 != 0):
+            new_width += 1
+        pad = (0, new_width - width)
+        img = np.pad(img, ((0, 0), (0, pad[1]), (0, 0)), 'maximum')
+    else:
+        width = img.shape[1]
+        ratio = width / size
+        height = int(np.ceil(img.shape[0] / ratio))
+        img = cv2.resize(img, (size, height), interpolation = cv2.INTER_AREA)
+        new_height = height
+        while (new_height % 32 != 0):
+            new_height += 1
+        pad = (new_height - height, 0)
+        img = np.pad(img, ((0, pad[0]), (0, 0), (0, 0)), 'maximum')
+    if (img.dtype == 'float32'):
+        np.clip(img, 0, 1, out = img)
+    return img, pad
+def open_json(file):
+    with open(file) as json_file:
+        data = json.load(json_file)
+    return data
+def extract_cbr(file, out_dir):
+    patoolib.extract_archive(file,  outdir = out_dir, verbosity = 1)
+def create_cbz(file_path, files):
+    patoolib.create_archive(file_path, files, verbosity = 1)
+def subfolder_image_search(start_folder):
+    return [x.as_posix() for x in Path(".").rglob("*.[pPjJ][nNpP][gG]")]
+def remove_folder(folder_path):
+    rmtree(folder_path)
+def sorted_alphanumeric(data):
+    convert = lambda text: int(text) if text.isdigit() else text.lower()
+    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
+    return sorted(data, key=alphanum_key)

utils/xdog.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from cv2 import resize, INTER_LANCZOS4, INTER_AREA
+from skimage.color import rgb2gray
+import numpy as np
+from scipy.ndimage.filters import gaussian_filter
+from skimage.filters import threshold_otsu
+import matplotlib.pyplot as plt
+class XDoGSketcher:
+    def __init__(self, gamma = 0.95, phi = 89.25, eps = -0.1, k = 8, sigma = 0.5, mult = 1):
+        self.params = {}
+        self.params['gamma'] = gamma
+        self.params['phi'] = phi
+        self.params['eps'] = eps
+        self.params['k'] = k
+        self.params['sigma'] = sigma
+        self.params['mult'] = mult
+    def _xdog(self, im, **transform_params):
+        # Source : https://github.com/CemalUnal/XDoG-Filter
+        # Reference : XDoG: An eXtended difference-of-Gaussians compendium including advanced image stylization
+        # Link : http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.365.151&rep=rep1&type=pdf
+        if im.shape[2] == 3:
+            im = rgb2gray(im)
+        imf1 = gaussian_filter(im, transform_params['sigma'])
+        imf2 = gaussian_filter(im, transform_params['sigma'] * transform_params['k'])
+        imdiff = imf1 - transform_params['gamma'] * imf2
+        imdiff = (imdiff < transform_params['eps']) * 1.0 \
+            + (imdiff >= transform_params['eps']) * (1.0 + np.tanh(transform_params['phi'] * imdiff))
+        imdiff -= imdiff.min()
+        imdiff /= imdiff.max()
+        th = threshold_otsu(imdiff)
+        imdiff = imdiff >= th
+        imdiff = imdiff.astype('float32')
+        return imdiff
+    def get_sketch(self, image, **kwargs):
+        current_params = self.params.copy()
+        for key in kwargs.keys():
+            if key in current_params.keys():
+                current_params[key] = kwargs[key]
+        result_image = self._xdog(image, **current_params)
+        return result_image
+    def get_sketch_with_resize(self, image, **kwargs):
+        if 'mult' in kwargs.keys():
+            mult = kwargs['mult']
+        else:
+            mult = self.params['mult']
+        temp_image = resize(image, (image.shape[1] * mult, image.shape[0] * mult), interpolation = INTER_LANCZOS4)
+        temp_image = self.get_sketch(temp_image, **kwargs)
+        image = resize(temp_image, (image.shape[1], image.shape[0]), interpolation = INTER_AREA)
+        return image