Spaces:

svjack
/

HR-VITON-streamlit

Sleeping

App Files Files Community

svjack commited on Oct 16, 2023

Commit

f774f0f

1 Parent(s): 9fffe95

Upload 7 files

Browse files

Files changed (7) hide show

app.py +415 -0
cp_dataset_test.py +264 -0
network_generator.py +433 -0
networks.py +453 -0
requirements.txt +12 -0
test_generator.py +278 -0
utils.py +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,415 @@

+from multiprocessing import set_start_method
+#set_start_method("fork")
+import sys
+#sys.path.insert(0, "../HR-VITON-main")
+from test_generator import *
+import re
+import inspect
+from dataclasses import dataclass, field
+from tqdm import tqdm
+import pandas as pd
+import os
+import torch
+import pandas as pd
+import gradio as gr
+import streamlit as st
+from io import BytesIO
+#### pip install streamlit-image-select
+from streamlit_image_select import image_select
+demo_image_dir = "demo_images_dir"
+assert os.path.exists(demo_image_dir)
+demo_images = list(map(lambda y: os.path.join(demo_image_dir, y) ,filter(lambda x: x.endswith(".png") or x.endswith(".jpeg") or x.endswith(".jpg")
+ ,os.listdir(demo_image_dir))))
+assert demo_images
+#https://github.com/jrieke/streamlit-image-select/issues/10
+#.image-box {
+#    border: 1px solid rgba(49, 51, 63, 0.2);
+#    border-radius: 0.25rem;
+#    padding: calc(0.25rem + 1px);
+#    height: 10rem;
+#    min-width: 10rem;
+#}
+demo_images = list(map(lambda x: x.resize((256, 256)), map(Image.open, demo_images)))
+@dataclass
+class OPT:
+    #### ConditionGenerator
+    out_layer = None
+    warp_feature = None
+    #### SPADEGenerator
+    semantic_nc = None
+    fine_height = None
+    fine_width = None
+    ngf = None
+    num_upsampling_layers = None
+    norm_G = None
+    gen_semantic_nc = None
+    #### weight load
+    tocg_checkpoint = None
+    gen_checkpoint = None
+    cuda = False
+    data_list = None
+    datamode = None
+    dataroot = None
+    batch_size = None
+    shuffle = False
+    workers = None
+    clothmask_composition = None
+    occlusion = False
+    datasetting = None
+opt = OPT()
+opt.out_layer = "relu"
+opt.warp_feature = "T1"
+input1_nc = 4  # cloth + cloth-mask
+nc = 13
+input2_nc = nc + 3  # parse_agnostic + densepose
+output_nc = nc
+tocg = ConditionGenerator(opt,
+input1_nc=input1_nc,
+input2_nc=input2_nc, output_nc=output_nc, ngf=96, norm_layer=nn.BatchNorm2d)
+#### SPADEResBlock
+from network_generator import SPADEResBlock
+opt.semantic_nc = 7
+opt.fine_height = 1024
+opt.fine_width = 768
+opt.ngf = 64
+opt.num_upsampling_layers = "most"
+opt.norm_G = "spectralaliasinstance"
+opt.gen_semantic_nc = 7
+generator = SPADEGenerator(opt, 3+3+3)
+generator.print_network()
+#### https://drive.google.com/open?id=1XJTCdRBOPVgVTmqzhVGFAgMm2NLkw5uQ&authuser=0
+opt.tocg_checkpoint = "mtviton.pth"
+#### https://drive.google.com/open?id=1T5_YDUhYSSKPC_nZMk2NeC-XXUFoYeNy&authuser=0
+opt.gen_checkpoint = "gen.pth"
+opt.cuda = False
+load_checkpoint(tocg, opt.tocg_checkpoint,opt)
+load_checkpoint_G(generator, opt.gen_checkpoint,opt)
+#### def test scope
+tocg.eval()
+generator.eval()
+opt.data_list = "test_pairs.txt"
+opt.datamode = "test"
+opt.dataroot = "zalando-hd-resized"
+opt.batch_size = 1
+opt.shuffle = False
+opt.workers = 1
+opt.semantic_nc = 13
+test_dataset = CPDatasetTest(opt)
+test_loader = CPDataLoader(opt, test_dataset)
+def construct_images(img_tensors, img_names = [None]):
+    #for img_tensor, img_name in zip(img_tensors, img_names):
+    for img_tensor, img_name in zip(img_tensors, img_names):
+        tensor = (img_tensor.clone() + 1) * 0.5 * 255
+        tensor = tensor.cpu().clamp(0, 255)
+        try:
+            array = tensor.numpy().astype('uint8')
+        except:
+            array = tensor.detach().numpy().astype('uint8')
+        if array.shape[0] == 1:
+            array = array.squeeze(0)
+        elif array.shape[0] == 3:
+            array = array.swapaxes(0, 1).swapaxes(1, 2)
+        im = Image.fromarray(array)
+        return im
+def single_pred_slim_func(opt, inputs, tocg = tocg, generator = generator):
+    gauss = tgm.image.GaussianBlur((15, 15), (3, 3))
+    if opt.cuda:
+        gauss = gauss.cuda()
+    # Model
+    if opt.cuda:
+        tocg.cuda()
+    tocg.eval()
+    generator.eval()
+    num = 0
+    iter_start_time = time.time()
+    with torch.no_grad():
+        for inputs in [inputs]:
+            if opt.cuda :
+                #pose_map = inputs['pose'].cuda()
+                pre_clothes_mask = inputs['cloth_mask'][opt.datasetting].cuda()
+                #label = inputs['parse']
+                parse_agnostic = inputs['parse_agnostic']
+                agnostic = inputs['agnostic'].cuda()
+                clothes = inputs['cloth'][opt.datasetting].cuda() # target cloth
+                densepose = inputs['densepose'].cuda()
+                #im = inputs['image']
+                #input_label, input_parse_agnostic = label.cuda(), parse_agnostic.cuda()
+                input_parse_agnostic = parse_agnostic.cuda()
+                pre_clothes_mask = torch.FloatTensor((pre_clothes_mask.detach().cpu().numpy() > 0.5).astype(np.float)).cuda()
+            else :
+                #pose_map = inputs['pose']
+                pre_clothes_mask = inputs['cloth_mask'][opt.datasetting]
+                #label = inputs['parse']
+                parse_agnostic = inputs['parse_agnostic']
+                agnostic = inputs['agnostic']
+                clothes = inputs['cloth'][opt.datasetting] # target cloth
+                densepose = inputs['densepose']
+                #im = inputs['image']
+                #input_label, input_parse_agnostic = label, parse_agnostic
+                input_parse_agnostic = parse_agnostic
+                pre_clothes_mask = torch.FloatTensor((pre_clothes_mask.detach().cpu().numpy() > 0.5).astype(np.float))
+            # down
+            #pose_map_down = F.interpolate(pose_map, size=(256, 192), mode='bilinear')
+            pre_clothes_mask_down = F.interpolate(pre_clothes_mask, size=(256, 192), mode='nearest')
+            #input_label_down = F.interpolate(input_label, size=(256, 192), mode='bilinear')
+            input_parse_agnostic_down = F.interpolate(input_parse_agnostic, size=(256, 192), mode='nearest')
+            #agnostic_down = F.interpolate(agnostic, size=(256, 192), mode='nearest')
+            clothes_down = F.interpolate(clothes, size=(256, 192), mode='bilinear')
+            densepose_down = F.interpolate(densepose, size=(256, 192), mode='bilinear')
+            shape = pre_clothes_mask.shape
+            # multi-task inputs
+            input1 = torch.cat([clothes_down, pre_clothes_mask_down], 1)
+            input2 = torch.cat([input_parse_agnostic_down, densepose_down], 1)
+            # forward
+            flow_list, fake_segmap, warped_cloth_paired, warped_clothmask_paired = tocg(opt,input1, input2)
+            # warped cloth mask one hot
+            if opt.cuda :
+                warped_cm_onehot = torch.FloatTensor((warped_clothmask_paired.detach().cpu().numpy() > 0.5).astype(np.float)).cuda()
+            else :
+                warped_cm_onehot = torch.FloatTensor((warped_clothmask_paired.detach().cpu().numpy() > 0.5).astype(np.float))
+            if opt.clothmask_composition != 'no_composition':
+                if opt.clothmask_composition == 'detach':
+                    cloth_mask = torch.ones_like(fake_segmap)
+                    cloth_mask[:,3:4, :, :] = warped_cm_onehot
+                    fake_segmap = fake_segmap * cloth_mask
+                if opt.clothmask_composition == 'warp_grad':
+                    cloth_mask = torch.ones_like(fake_segmap)
+                    cloth_mask[:,3:4, :, :] = warped_clothmask_paired
+                    fake_segmap = fake_segmap * cloth_mask
+            # make generator input parse map
+            fake_parse_gauss = gauss(F.interpolate(fake_segmap, size=(opt.fine_height, opt.fine_width), mode='bilinear'))
+            fake_parse = fake_parse_gauss.argmax(dim=1)[:, None]
+            if opt.cuda :
+                old_parse = torch.FloatTensor(fake_parse.size(0), 13, opt.fine_height, opt.fine_width).zero_().cuda()
+            else:
+                old_parse = torch.FloatTensor(fake_parse.size(0), 13, opt.fine_height, opt.fine_width).zero_()
+            old_parse.scatter_(1, fake_parse, 1.0)
+            labels = {
+                0:  ['background',  [0]],
+                1:  ['paste',       [2, 4, 7, 8, 9, 10, 11]],
+                2:  ['upper',       [3]],
+                3:  ['hair',        [1]],
+                4:  ['left_arm',    [5]],
+                5:  ['right_arm',   [6]],
+                6:  ['noise',       [12]]
+            }
+            if opt.cuda :
+                parse = torch.FloatTensor(fake_parse.size(0), 7, opt.fine_height, opt.fine_width).zero_().cuda()
+            else:
+                parse = torch.FloatTensor(fake_parse.size(0), 7, opt.fine_height, opt.fine_width).zero_()
+            for i in range(len(labels)):
+                for label in labels[i][1]:
+                    parse[:, i] += old_parse[:, label]
+            # warped cloth
+            N, _, iH, iW = clothes.shape
+            flow = F.interpolate(flow_list[-1].permute(0, 3, 1, 2), size=(iH, iW), mode='bilinear').permute(0, 2, 3, 1)
+            flow_norm = torch.cat([flow[:, :, :, 0:1] / ((96 - 1.0) / 2.0), flow[:, :, :, 1:2] / ((128 - 1.0) / 2.0)], 3)
+            grid = make_grid(N, iH, iW,opt)
+            warped_grid = grid + flow_norm
+            warped_cloth = F.grid_sample(clothes, warped_grid, padding_mode='border')
+            warped_clothmask = F.grid_sample(pre_clothes_mask, warped_grid, padding_mode='border')
+            if opt.occlusion:
+                warped_clothmask = remove_overlap(F.softmax(fake_parse_gauss, dim=1), warped_clothmask)
+                warped_cloth = warped_cloth * warped_clothmask + torch.ones_like(warped_cloth) * (1-warped_clothmask)
+            output = generator(torch.cat((agnostic, densepose, warped_cloth), dim=1), parse)
+            # save output
+            return output
+            #save_images(output, unpaired_names, output_dir)
+            #num += shape[0]
+            #print(num)
+opt.clothmask_composition = "warp_grad"
+opt.occlusion = False
+opt.datasetting = "unpaired"
+def read_img_and_trans(dataset ,opt ,img_path):
+    if type(img_path) in [type("")]:
+        im = Image.open(img_path)
+    else:
+        im = img_path
+    im = transforms.Resize(opt.fine_width, interpolation=2)(im)
+    im = dataset.transform(im)
+    return im
+import sys
+sys.path.insert(0, "fashion-eye-try-on")
+import os
+from PIL import Image
+import gradio as gr
+from cloth_segmentation import generate_cloth_mask
+def generate_cloth_mask_and_display(cloth_img):
+    path = 'fashion-eye-try-on/cloth/cloth.jpg'
+    if os.path.exists(path):
+        os.remove(path)
+    cloth_img.save(path)
+    try:
+        # os.system('.\cloth_segmentation\generate_cloth_mask.py')
+        generate_cloth_mask()
+    except Exception as e:
+        print(e)
+        return
+    cloth_mask_img = Image.open("fashion-eye-try-on/cloth_mask/cloth.jpg")
+    return cloth_mask_img
+def take_human_feature_from_dataset(dataset, idx):
+    inputs_upper = list(torch.utils.data.DataLoader(
+                    [dataset[idx]], batch_size=1))[0]
+    return  {
+        "parse_agnostic":  inputs_upper["parse_agnostic"],
+        "agnostic": inputs_upper["agnostic"],
+        "densepose":  inputs_upper["densepose"],
+    }
+def take_all_feature_with_dataset(cloth_img_path, idx, opt = opt, dataset = test_dataset, only_show_human = False):
+    if type(cloth_img_path) != type(""):
+        assert hasattr(cloth_img_path, "save")
+        cloth_img_path.save("tmp_cloth.jpg")
+        cloth_img_path = "tmp_cloth.jpg"
+    assert type(cloth_img_path) == type("")
+    inputs_upper_dict = take_human_feature_from_dataset(dataset, idx)
+    if only_show_human:
+        return Image.fromarray((inputs_upper_dict["densepose"][0].numpy().transpose((1, 2, 0)) * 255).astype(np.uint8))
+    cloth_readed = read_img_and_trans(dataset, opt,
+        cloth_img_path
+                      )
+    #assert ((cloth_readed - inputs_upper["cloth"][opt.datasetting][0]) ** 2).sum().numpy() < 1e-15
+    cloth_input = {
+        opt.datasetting: cloth_readed[None,:]
+    }
+    mask_img = generate_cloth_mask_and_display(
+        Image.open(
+        cloth_img_path
+        )
+    )
+    cloth_mask_input = {
+    opt.datasetting:
+    torch.Tensor((np.asarray(mask_img) / 255))[None, None, :]
+    }
+    inputs_upper_dict["cloth"] = cloth_input
+    inputs_upper_dict["cloth_mask"] = cloth_mask_input
+    return inputs_upper_dict
+def pred_func(cloth_img, pidx
+):
+    idx = int(pidx)
+    im = cloth_img
+    #### truly input
+    inputs_upper_dict = take_all_feature_with_dataset(
+        im, idx, only_show_human = False)
+    output_slim = single_pred_slim_func(opt, inputs_upper_dict)
+    output_img = construct_images(output_slim)
+    return output_img
+option = st.selectbox(
+        "Choose cloth image or Upload cloth image",
+        ("Choose", "Upload", )
+    )
+if type(option) != type(""):
+    option = "Choose"
+img = None
+uploaded_file = None
+if option == "Upload":
+    # To read file as bytes:
+    uploaded_file = st.file_uploader("Upload img")
+    if uploaded_file is not None:
+        bytes_data = uploaded_file.getvalue()
+        img = Image.open(BytesIO(bytes_data))
+        cloth_img = img.convert("RGB").resize((256 + 128, 512))
+        st.image(cloth_img)
+        uploaded_file = st.selectbox(
+                    "Have Choose the image",
+                    ("Wait", "Have Done")
+        )
+else:
+    img = image_select("Choose img", demo_images)
+    #img = Image.open(img)
+    cloth_img = img.convert("RGB").resize((256 + 128, 512))
+    st.image(cloth_img)
+    uploaded_file = st.selectbox(
+            "Have Choose the image",
+            ("Wait", "Have Done")
+    )
+if img is not None and (uploaded_file is not "Wait" and uploaded_file is not None):
+    cloth_img = img.convert("RGB").resize((768, 1024))
+    #pidx = 44
+    pidx_index_list = [44, 84, 67]
+    poeses = []
+    for idx in range(len(pidx_index_list)):
+        poeses.append(
+            take_all_feature_with_dataset(
+                cloth_img, pidx_index_list[idx], only_show_human = True)
+        )
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.header("Pose 0")
+        pose_img = poeses[0]
+        st.image(pose_img)
+        b = pred_func(cloth_img, pidx_index_list[0])
+        st.image(b)
+    with col2:
+        st.header("Pose 1")
+        pose_img = poeses[1]
+        st.image(pose_img)
+        b = pred_func(cloth_img, pidx_index_list[1])
+        st.image(b)
+    with col3:
+        st.header("Pose 2")
+        pose_img = poeses[2]
+        st.image(pose_img)
+        b = pred_func(cloth_img, pidx_index_list[2])
+        st.image(b)

cp_dataset_test.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import torch
+import torch.utils.data as data
+import torchvision.transforms as transforms
+from PIL import Image, ImageDraw
+import os.path as osp
+import numpy as np
+import json
+class CPDatasetTest(data.Dataset):
+    """
+        Test Dataset for CP-VTON.
+    """
+    def __init__(self, opt):
+        super(CPDatasetTest, self).__init__()
+        # base setting
+        self.opt = opt
+        self.root = opt.dataroot
+        self.datamode = opt.datamode # train or test or self-defined
+        self.data_list = opt.data_list
+        self.fine_height = opt.fine_height
+        self.fine_width = opt.fine_width
+        self.semantic_nc = opt.semantic_nc
+        self.data_path = osp.join(opt.dataroot, opt.datamode)
+        self.transform = transforms.Compose([  \
+                transforms.ToTensor(),   \
+                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+        # load data list
+        im_names = []
+        c_names = []
+        with open(osp.join(opt.dataroot, opt.data_list), 'r') as f:
+            for line in f.readlines():
+                im_name, c_name = line.strip().split()
+                im_names.append(im_name)
+                c_names.append(c_name)
+        self.im_names = im_names
+        self.c_names = dict()
+        self.c_names['paired'] = im_names
+        self.c_names['unpaired'] = c_names
+    def name(self):
+        return "CPDataset"
+    def get_agnostic(self, im, im_parse, pose_data):
+        parse_array = np.array(im_parse)
+        parse_head = ((parse_array == 4).astype(np.float32) +
+                      (parse_array == 13).astype(np.float32))
+        parse_lower = ((parse_array == 9).astype(np.float32) +
+                       (parse_array == 12).astype(np.float32) +
+                       (parse_array == 16).astype(np.float32) +
+                       (parse_array == 17).astype(np.float32) +
+                       (parse_array == 18).astype(np.float32) +
+                       (parse_array == 19).astype(np.float32))
+        agnostic = im.copy()
+        agnostic_draw = ImageDraw.Draw(agnostic)
+        length_a = np.linalg.norm(pose_data[5] - pose_data[2])
+        length_b = np.linalg.norm(pose_data[12] - pose_data[9])
+        point = (pose_data[9] + pose_data[12]) / 2
+        pose_data[9] = point + (pose_data[9] - point) / length_b * length_a
+        pose_data[12] = point + (pose_data[12] - point) / length_b * length_a
+        r = int(length_a / 16) + 1
+        # mask torso
+        for i in [9, 12]:
+            pointx, pointy = pose_data[i]
+            agnostic_draw.ellipse((pointx-r*3, pointy-r*6, pointx+r*3, pointy+r*6), 'gray', 'gray')
+        agnostic_draw.line([tuple(pose_data[i]) for i in [2, 9]], 'gray', width=r*6)
+        agnostic_draw.line([tuple(pose_data[i]) for i in [5, 12]], 'gray', width=r*6)
+        agnostic_draw.line([tuple(pose_data[i]) for i in [9, 12]], 'gray', width=r*12)
+        agnostic_draw.polygon([tuple(pose_data[i]) for i in [2, 5, 12, 9]], 'gray', 'gray')
+        # mask neck
+        pointx, pointy = pose_data[1]
+        agnostic_draw.rectangle((pointx-r*5, pointy-r*9, pointx+r*5, pointy), 'gray', 'gray')
+        # mask arms
+        agnostic_draw.line([tuple(pose_data[i]) for i in [2, 5]], 'gray', width=r*12)
+        for i in [2, 5]:
+            pointx, pointy = pose_data[i]
+            agnostic_draw.ellipse((pointx-r*5, pointy-r*6, pointx+r*5, pointy+r*6), 'gray', 'gray')
+        for i in [3, 4, 6, 7]:
+            if (pose_data[i-1, 0] == 0.0 and pose_data[i-1, 1] == 0.0) or (pose_data[i, 0] == 0.0 and pose_data[i, 1] == 0.0):
+                continue
+            agnostic_draw.line([tuple(pose_data[j]) for j in [i - 1, i]], 'gray', width=r*10)
+            pointx, pointy = pose_data[i]
+            agnostic_draw.ellipse((pointx-r*5, pointy-r*5, pointx+r*5, pointy+r*5), 'gray', 'gray')
+        for parse_id, pose_ids in [(14, [5, 6, 7]), (15, [2, 3, 4])]:
+            mask_arm = Image.new('L', (768, 1024), 'white')
+            mask_arm_draw = ImageDraw.Draw(mask_arm)
+            pointx, pointy = pose_data[pose_ids[0]]
+            mask_arm_draw.ellipse((pointx-r*5, pointy-r*6, pointx+r*5, pointy+r*6), 'black', 'black')
+            for i in pose_ids[1:]:
+                if (pose_data[i-1, 0] == 0.0 and pose_data[i-1, 1] == 0.0) or (pose_data[i, 0] == 0.0 and pose_data[i, 1] == 0.0):
+                    continue
+                mask_arm_draw.line([tuple(pose_data[j]) for j in [i - 1, i]], 'black', width=r*10)
+                pointx, pointy = pose_data[i]
+                if i != pose_ids[-1]:
+                    mask_arm_draw.ellipse((pointx-r*5, pointy-r*5, pointx+r*5, pointy+r*5), 'black', 'black')
+            mask_arm_draw.ellipse((pointx-r*4, pointy-r*4, pointx+r*4, pointy+r*4), 'black', 'black')
+            parse_arm = (np.array(mask_arm) / 255) * (parse_array == parse_id).astype(np.float32)
+            agnostic.paste(im, None, Image.fromarray(np.uint8(parse_arm * 255), 'L'))
+        agnostic.paste(im, None, Image.fromarray(np.uint8(parse_head * 255), 'L'))
+        agnostic.paste(im, None, Image.fromarray(np.uint8(parse_lower * 255), 'L'))
+        return agnostic
+    def __getitem__(self, index):
+        im_name = self.im_names[index]
+        c_name = {}
+        c = {}
+        cm = {}
+        for key in self.c_names:
+            c_name[key] = self.c_names[key][index]
+            c[key] = Image.open(osp.join(self.data_path, 'cloth', c_name[key])).convert('RGB')
+            c[key] = transforms.Resize(self.fine_width, interpolation=2)(c[key])
+            cm[key] = Image.open(osp.join(self.data_path, 'cloth-mask', c_name[key]))
+            cm[key] = transforms.Resize(self.fine_width, interpolation=0)(cm[key])
+            c[key] = self.transform(c[key])  # [-1,1]
+            cm_array = np.array(cm[key])
+            cm_array = (cm_array >= 128).astype(np.float32)
+            cm[key] = torch.from_numpy(cm_array)  # [0,1]
+            cm[key].unsqueeze_(0)
+        # person image
+        im_pil_big = Image.open(osp.join(self.data_path, 'image', im_name))
+        im_pil = transforms.Resize(self.fine_width, interpolation=2)(im_pil_big)
+        im = self.transform(im_pil)
+        # load parsing image
+        parse_name = im_name.replace('.jpg', '.png')
+        im_parse_pil_big = Image.open(osp.join(self.data_path, 'image-parse-v3', parse_name))
+        im_parse_pil = transforms.Resize(self.fine_width, interpolation=0)(im_parse_pil_big)
+        parse = torch.from_numpy(np.array(im_parse_pil)[None]).long()
+        im_parse = self.transform(im_parse_pil.convert('RGB'))
+        labels = {
+            0:  ['background',  [0, 10]],
+            1:  ['hair',        [1, 2]],
+            2:  ['face',        [4, 13]],
+            3:  ['upper',       [5, 6, 7]],
+            4:  ['bottom',      [9, 12]],
+            5:  ['left_arm',    [14]],
+            6:  ['right_arm',   [15]],
+            7:  ['left_leg',    [16]],
+            8:  ['right_leg',   [17]],
+            9:  ['left_shoe',   [18]],
+            10: ['right_shoe',  [19]],
+            11: ['socks',       [8]],
+            12: ['noise',       [3, 11]]
+        }
+        parse_map = torch.FloatTensor(20, self.fine_height, self.fine_width).zero_()
+        parse_map = parse_map.scatter_(0, parse, 1.0)
+        new_parse_map = torch.FloatTensor(self.semantic_nc, self.fine_height, self.fine_width).zero_()
+        for i in range(len(labels)):
+            for label in labels[i][1]:
+                new_parse_map[i] += parse_map[label]
+        parse_onehot = torch.FloatTensor(1, self.fine_height, self.fine_width).zero_()
+        for i in range(len(labels)):
+            for label in labels[i][1]:
+                parse_onehot[0] += parse_map[label] * i
+        # load image-parse-agnostic
+        image_parse_agnostic = Image.open(osp.join(self.data_path, 'image-parse-agnostic-v3.2', parse_name))
+        image_parse_agnostic = transforms.Resize(self.fine_width, interpolation=0)(image_parse_agnostic)
+        parse_agnostic = torch.from_numpy(np.array(image_parse_agnostic)[None]).long()
+        image_parse_agnostic = self.transform(image_parse_agnostic.convert('RGB'))
+        parse_agnostic_map = torch.FloatTensor(20, self.fine_height, self.fine_width).zero_()
+        parse_agnostic_map = parse_agnostic_map.scatter_(0, parse_agnostic, 1.0)
+        new_parse_agnostic_map = torch.FloatTensor(self.semantic_nc, self.fine_height, self.fine_width).zero_()
+        for i in range(len(labels)):
+            for label in labels[i][1]:
+                new_parse_agnostic_map[i] += parse_agnostic_map[label]
+        # parse cloth & parse cloth mask
+        pcm = new_parse_map[3:4]
+        im_c = im * pcm + (1 - pcm)
+        # load pose points
+        pose_name = im_name.replace('.jpg', '_rendered.png')
+        pose_map = Image.open(osp.join(self.data_path, 'openpose_img', pose_name))
+        pose_map = transforms.Resize(self.fine_width, interpolation=2)(pose_map)
+        pose_map = self.transform(pose_map)  # [-1,1]
+        pose_name = im_name.replace('.jpg', '_keypoints.json')
+        with open(osp.join(self.data_path, 'openpose_json', pose_name), 'r') as f:
+            pose_label = json.load(f)
+            pose_data = pose_label['people'][0]['pose_keypoints_2d']
+            pose_data = np.array(pose_data)
+            pose_data = pose_data.reshape((-1, 3))[:, :2]
+        # load densepose
+        densepose_name = im_name.replace('image', 'image-densepose')
+        densepose_map = Image.open(osp.join(self.data_path, 'image-densepose', densepose_name))
+        densepose_map = transforms.Resize(self.fine_width, interpolation=2)(densepose_map)
+        densepose_map = self.transform(densepose_map)  # [-1,1]
+        agnostic = self.get_agnostic(im_pil_big, im_parse_pil_big, pose_data)
+        agnostic = transforms.Resize(self.fine_width, interpolation=2)(agnostic)
+        agnostic = self.transform(agnostic)
+        result = {
+            'c_name':   c_name,     # for visualization
+            'im_name':  im_name,    # for visualization or ground truth
+            # intput 1 (clothfloww)
+            'cloth':    c,          # for input
+            'cloth_mask':     cm,   # for input
+            # intput 2 (segnet)
+            'parse_agnostic': new_parse_agnostic_map,
+            'densepose': densepose_map,
+            'pose': pose_map,       # for conditioning
+            # GT
+            'parse_onehot' : parse_onehot,  # Cross Entropy
+            'parse': new_parse_map, # GAN Loss real
+            'pcm': pcm,             # L1 Loss & vis
+            'parse_cloth': im_c,    # VGG Loss & vis
+            # visualization
+            'image':    im,         # for visualization
+            'agnostic' : agnostic
+            }
+        return result
+    def __len__(self):
+        return len(self.im_names)
+class CPDataLoader(object):
+    def __init__(self, opt, dataset):
+        super(CPDataLoader, self).__init__()
+        if opt.shuffle :
+            train_sampler = torch.utils.data.sampler.RandomSampler(dataset)
+        else:
+            train_sampler = None
+        self.data_loader = torch.utils.data.DataLoader(
+                dataset, batch_size=opt.batch_size, shuffle=(train_sampler is None),
+                num_workers=opt.workers, pin_memory=True, drop_last=True, sampler=train_sampler)
+        self.dataset = dataset
+        self.data_iter = self.data_loader.__iter__()
+    def next_batch(self):
+        try:
+            batch = self.data_iter.__next__()
+        except StopIteration:
+            self.data_iter = self.data_loader.__iter__()
+            batch = self.data_iter.__next__()
+        return batch

network_generator.py ADDED Viewed

	@@ -0,0 +1,433 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import init
+from torch.nn.utils import spectral_norm
+import numpy as np
+class BaseNetwork(nn.Module):
+    def __init__(self):
+        super(BaseNetwork, self).__init__()
+    def print_network(self):
+        num_params = 0
+        for param in self.parameters():
+            num_params += param.numel()
+        print("Network [{}] was created. Total number of parameters: {:.1f} million. "
+              "To see the architecture, do print(network).".format(self.__class__.__name__, num_params / 1000000))
+    def init_weights(self, init_type='normal', gain=0.02):
+        def init_func(m):
+            classname = m.__class__.__name__
+            if 'BatchNorm2d' in classname:
+                if hasattr(m, 'weight') and m.weight is not None:
+                    init.normal_(m.weight.data, 1.0, gain)
+                if hasattr(m, 'bias') and m.bias is not None:
+                    init.constant_(m.bias.data, 0.0)
+            elif ('Conv' in classname or 'Linear' in classname) and hasattr(m, 'weight'):
+                if init_type == 'normal':
+                    init.normal_(m.weight.data, 0.0, gain)
+                elif init_type == 'xavier':
+                    init.xavier_normal_(m.weight.data, gain=gain)
+                elif init_type == 'xavier_uniform':
+                    init.xavier_uniform_(m.weight.data, gain=1.0)
+                elif init_type == 'kaiming':
+                    init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
+                elif init_type == 'orthogonal':
+                    init.orthogonal_(m.weight.data, gain=gain)
+                elif init_type == 'none':  # uses pytorch's default init method
+                    m.reset_parameters()
+                else:
+                    raise NotImplementedError("initialization method '{}' is not implemented".format(init_type))
+                if hasattr(m, 'bias') and m.bias is not None:
+                    init.constant_(m.bias.data, 0.0)
+        self.apply(init_func)
+    def forward(self, *inputs):
+        pass
+class MaskNorm(nn.Module):
+    def __init__(self, norm_nc):
+        super(MaskNorm, self).__init__()
+        self.norm_layer = nn.InstanceNorm2d(norm_nc, affine=False)
+    def normalize_region(self, region, mask):
+        b, c, h, w = region.size()
+        num_pixels = mask.sum((2, 3), keepdim=True)  # size: (b, 1, 1, 1)
+        num_pixels[num_pixels == 0] = 1
+        mu = region.sum((2, 3), keepdim=True) / num_pixels  # size: (b, c, 1, 1)
+        normalized_region = self.norm_layer(region + (1 - mask) * mu)
+        return normalized_region * torch.sqrt(num_pixels / (h * w))
+    def forward(self, x, mask):
+        mask = mask.detach()
+        normalized_foreground = self.normalize_region(x * mask, mask)
+        normalized_background = self.normalize_region(x * (1 - mask), 1 - mask)
+        return normalized_foreground + normalized_background
+class SPADENorm(nn.Module):
+    def __init__(self,opt, norm_type, norm_nc, label_nc):
+        super(SPADENorm, self).__init__()
+        self.param_opt=opt
+        self.noise_scale = nn.Parameter(torch.zeros(norm_nc))
+        assert norm_type.startswith('alias')
+        param_free_norm_type = norm_type[len('alias'):]
+        if param_free_norm_type == 'batch':
+            self.param_free_norm = nn.BatchNorm2d(norm_nc, affine=False)
+        elif param_free_norm_type == 'instance':
+            self.param_free_norm = nn.InstanceNorm2d(norm_nc, affine=False)
+        elif param_free_norm_type == 'mask':
+            self.param_free_norm = MaskNorm(norm_nc)
+        else:
+            raise ValueError(
+                "'{}' is not a recognized parameter-free normalization type in SPADENorm".format(param_free_norm_type)
+            )
+        nhidden = 128
+        ks = 3
+        pw = ks // 2
+        self.conv_shared = nn.Sequential(nn.Conv2d(label_nc, nhidden, kernel_size=ks, padding=pw), nn.ReLU())
+        self.conv_gamma = nn.Conv2d(nhidden, norm_nc, kernel_size=ks, padding=pw)
+        self.conv_beta = nn.Conv2d(nhidden, norm_nc, kernel_size=ks, padding=pw)
+    def forward(self, x, seg, misalign_mask=None):
+        # Part 1. Generate parameter-free normalized activations.
+        b, c, h, w = x.size()
+        if self.param_opt.cuda :
+            noise = (torch.randn(b, w, h, 1).cuda() * self.noise_scale).transpose(1, 3)
+        else:
+            noise = (torch.randn(b, w, h, 1)* self.noise_scale).transpose(1, 3)
+        if misalign_mask is None:
+            normalized = self.param_free_norm(x + noise)
+        else:
+            normalized = self.param_free_norm(x + noise, misalign_mask)
+        # Part 2. Produce affine parameters conditioned on the segmentation map.
+        actv = self.conv_shared(seg)
+        gamma = self.conv_gamma(actv)
+        beta = self.conv_beta(actv)
+        # Apply the affine parameters.
+        output = normalized * (1 + gamma) + beta
+        return output
+class SPADEResBlock(nn.Module):
+    def __init__(self, opt, input_nc, output_nc, use_mask_norm=True):
+        super(SPADEResBlock, self).__init__()
+        self.param_opt=opt
+        self.learned_shortcut = (input_nc != output_nc)
+        middle_nc = min(input_nc, output_nc)
+        self.conv_0 = nn.Conv2d(input_nc, middle_nc, kernel_size=3, padding=1)
+        self.conv_1 = nn.Conv2d(middle_nc, output_nc, kernel_size=3, padding=1)
+        if self.learned_shortcut:
+            self.conv_s = nn.Conv2d(input_nc, output_nc, kernel_size=1, bias=False)
+        subnorm_type = opt.norm_G
+        if subnorm_type.startswith('spectral'):
+            subnorm_type = subnorm_type[len('spectral'):]
+            self.conv_0 = spectral_norm(self.conv_0)
+            self.conv_1 = spectral_norm(self.conv_1)
+            if self.learned_shortcut:
+                self.conv_s = spectral_norm(self.conv_s)
+        gen_semantic_nc = opt.gen_semantic_nc
+        if use_mask_norm:
+            subnorm_type = 'aliasmask'
+            gen_semantic_nc = gen_semantic_nc + 1
+        self.norm_0 = SPADENorm(opt,subnorm_type, input_nc, gen_semantic_nc)
+        self.norm_1 = SPADENorm(opt,subnorm_type, middle_nc, gen_semantic_nc)
+        if self.learned_shortcut:
+            self.norm_s = SPADENorm(opt,subnorm_type, input_nc, gen_semantic_nc)
+        self.relu = nn.LeakyReLU(0.2)
+    def shortcut(self, x, seg, misalign_mask):
+        if self.learned_shortcut:
+            return self.conv_s(self.norm_s(x, seg, misalign_mask))
+        else:
+            return x
+    def forward(self, x, seg, misalign_mask=None):
+        seg = F.interpolate(seg, size=x.size()[2:], mode='nearest')
+        if misalign_mask is not None:
+            misalign_mask = F.interpolate(misalign_mask, size=x.size()[2:], mode='nearest')
+        x_s = self.shortcut(x, seg, misalign_mask)
+        dx = self.conv_0(self.relu(self.norm_0(x, seg, misalign_mask)))
+        dx = self.conv_1(self.relu(self.norm_1(dx, seg, misalign_mask)))
+        output = x_s + dx
+        return output
+class SPADEGenerator(BaseNetwork):
+    def __init__(self, opt, input_nc):
+        super(SPADEGenerator, self).__init__()
+        self.num_upsampling_layers = opt.num_upsampling_layers
+        self.param_opt=opt
+        self.sh, self.sw = self.compute_latent_vector_size(opt)
+        nf = opt.ngf
+        self.conv_0 = nn.Conv2d(input_nc, nf * 16, kernel_size=3, padding=1)
+        for i in range(1, 8):
+            self.add_module('conv_{}'.format(i), nn.Conv2d(input_nc, 16, kernel_size=3, padding=1))
+        self.head_0 = SPADEResBlock(opt, nf * 16, nf * 16, use_mask_norm=False)
+        self.G_middle_0 = SPADEResBlock(opt, nf * 16 + 16, nf * 16, use_mask_norm=False)
+        self.G_middle_1 = SPADEResBlock(opt, nf * 16 + 16, nf * 16, use_mask_norm=False)
+        self.up_0 = SPADEResBlock(opt, nf * 16 + 16, nf * 8, use_mask_norm=False)
+        self.up_1 = SPADEResBlock(opt, nf * 8 + 16, nf * 4, use_mask_norm=False)
+        self.up_2 = SPADEResBlock(opt, nf * 4 + 16, nf * 2, use_mask_norm=False)
+        self.up_3 = SPADEResBlock(opt, nf * 2 + 16, nf * 1, use_mask_norm=False)
+        if self.num_upsampling_layers == 'most':
+            self.up_4 = SPADEResBlock(opt, nf * 1 + 16, nf // 2, use_mask_norm=False)
+            nf = nf // 2
+        self.conv_img = nn.Conv2d(nf, 3, kernel_size=3, padding=1)
+        self.up = nn.Upsample(scale_factor=2, mode='nearest')
+        self.relu = nn.LeakyReLU(0.2)
+        self.tanh = nn.Tanh()
+    def compute_latent_vector_size(self, opt):
+        if self.num_upsampling_layers == 'normal':
+            num_up_layers = 5
+        elif self.num_upsampling_layers == 'more':
+            num_up_layers = 6
+        elif self.num_upsampling_layers == 'most':
+            num_up_layers = 7
+        else:
+            raise ValueError("opt.num_upsampling_layers '{}' is not recognized".format(self.num_upsampling_layers))
+        sh = opt.fine_height // 2**num_up_layers
+        sw = opt.fine_width // 2**num_up_layers
+        return sh, sw
+    def forward(self, x, seg):
+        samples = [F.interpolate(x, size=(self.sh * 2**i, self.sw * 2**i), mode='nearest') for i in range(8)]
+        features = [self._modules['conv_{}'.format(i)](samples[i]) for i in range(8)]
+        x = self.head_0(features[0], seg)
+        x = self.up(x)
+        x = self.G_middle_0(torch.cat((x, features[1]), 1), seg)
+        if self.num_upsampling_layers in ['more', 'most']:
+            x = self.up(x)
+        x = self.G_middle_1(torch.cat((x, features[2]), 1), seg)
+        x = self.up(x)
+        x = self.up_0(torch.cat((x, features[3]), 1), seg)
+        x = self.up(x)
+        x = self.up_1(torch.cat((x, features[4]), 1), seg)
+        x = self.up(x)
+        x = self.up_2(torch.cat((x, features[5]), 1), seg)
+        x = self.up(x)
+        x = self.up_3(torch.cat((x, features[6]), 1), seg)
+        if self.num_upsampling_layers == 'most':
+            x = self.up(x)
+            x = self.up_4(torch.cat((x, features[7]), 1), seg)
+        x = self.conv_img(self.relu(x))
+        return self.tanh(x)
+########################################################################
+########################################################################
+class NLayerDiscriminator(BaseNetwork):
+    def __init__(self, opt):
+        super().__init__()
+        self.no_ganFeat_loss = opt.no_ganFeat_loss
+        nf = opt.ndf
+        kw = 4
+        pw = int(np.ceil((kw - 1.0) / 2))
+        norm_layer = get_nonspade_norm_layer(opt.norm_D)
+        input_nc = opt.gen_semantic_nc + 3
+        # input_nc = opt.gen_semantic_nc + 13
+        sequence = [[nn.Conv2d(input_nc, nf, kernel_size=kw, stride=2, padding=pw),
+                     nn.LeakyReLU(0.2, False)]]
+        for n in range(1, opt.n_layers_D):
+            nf_prev = nf
+            nf = min(nf * 2, 512)
+            sequence += [[norm_layer(nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=pw)),
+                          nn.LeakyReLU(0.2, False)]]
+        sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=pw)]]
+        # We divide the layers into groups to extract intermediate layer outputs
+        for n in range(len(sequence)):
+            self.add_module('model' + str(n), nn.Sequential(*sequence[n]))
+    def forward(self, input):
+        results = [input]
+        for submodel in self.children():
+            intermediate_output = submodel(results[-1])
+            results.append(intermediate_output)
+        get_intermediate_features = not self.no_ganFeat_loss
+        if get_intermediate_features:
+            return results[1:]
+        else:
+            return results[-1]
+class MultiscaleDiscriminator(BaseNetwork):
+    def __init__(self, opt):
+        super().__init__()
+        self.no_ganFeat_loss = opt.no_ganFeat_loss
+        for i in range(opt.num_D):
+            subnetD = NLayerDiscriminator(opt)
+            self.add_module('discriminator_%d' % i, subnetD)
+    def downsample(self, input):
+        return F.avg_pool2d(input, kernel_size=3, stride=2, padding=[1, 1], count_include_pad=False)
+    # Returns list of lists of discriminator outputs.
+    # The final result is of size opt.num_D x opt.n_layers_D
+    def forward(self, input):
+        result = []
+        get_intermediate_features = not self.no_ganFeat_loss
+        for name, D in self.named_children():
+            out = D(input)
+            if not get_intermediate_features:
+                out = [out]
+            result.append(out)
+            input = self.downsample(input)
+        return result
+class GANLoss(nn.Module):
+    def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0, tensor=torch.FloatTensor):
+        super(GANLoss, self).__init__()
+        self.real_label = target_real_label
+        self.fake_label = target_fake_label
+        self.real_label_tensor = None
+        self.fake_label_tensor = None
+        self.zero_tensor = None
+        self.Tensor = tensor
+        self.gan_mode = gan_mode
+        if gan_mode == 'ls':
+            pass
+        elif gan_mode == 'original':
+            pass
+        elif gan_mode == 'w':
+            pass
+        elif gan_mode == 'hinge':
+            pass
+        else:
+            raise ValueError('Unexpected gan_mode {}'.format(gan_mode))
+    def get_target_tensor(self, input, target_is_real):
+        if target_is_real:
+            if self.real_label_tensor is None:
+                self.real_label_tensor = self.Tensor(1).fill_(self.real_label)
+                self.real_label_tensor.requires_grad_(False)
+            return self.real_label_tensor.expand_as(input)
+        else:
+            if self.fake_label_tensor is None:
+                self.fake_label_tensor = self.Tensor(1).fill_(self.fake_label)
+                self.fake_label_tensor.requires_grad_(False)
+            return self.fake_label_tensor.expand_as(input)
+    def get_zero_tensor(self, input):
+        if self.zero_tensor is None:
+            self.zero_tensor = self.Tensor(1).fill_(0)
+            self.zero_tensor.requires_grad_(False)
+        return self.zero_tensor.expand_as(input)
+    def loss(self, input, target_is_real, for_discriminator=True):
+        if self.gan_mode == 'original':  # cross entropy loss
+            target_tensor = self.get_target_tensor(input, target_is_real)
+            loss = F.binary_cross_entropy_with_logits(input, target_tensor)
+            return loss
+        elif self.gan_mode == 'ls':
+            target_tensor = self.get_target_tensor(input, target_is_real)
+            return F.mse_loss(input, target_tensor)
+        elif self.gan_mode == 'hinge':
+            if for_discriminator:
+                if target_is_real:
+                    minval = torch.min(input - 1, self.get_zero_tensor(input))
+                    loss = -torch.mean(minval)
+                else:
+                    minval = torch.min(-input - 1, self.get_zero_tensor(input))
+                    loss = -torch.mean(minval)
+            else:
+                assert target_is_real, "The generator's hinge loss must be aiming for real"
+                loss = -torch.mean(input)
+            return loss
+        else:
+            # wgan
+            if target_is_real:
+                return -input.mean()
+            else:
+                return input.mean()
+    def __call__(self, input, target_is_real, for_discriminator=True):
+        # computing loss is a bit complicated because |input| may not be
+        # a tensor, but list of tensors in case of multiscale discriminator
+        if isinstance(input, list):
+            loss = 0
+            for pred_i in input:
+                if isinstance(pred_i, list):
+                    pred_i = pred_i[-1]
+                loss_tensor = self.loss(pred_i, target_is_real, for_discriminator)
+                bs = 1 if len(loss_tensor.size()) == 0 else loss_tensor.size(0)
+                new_loss = torch.mean(loss_tensor.view(bs, -1), dim=1)
+                loss += new_loss
+            return loss / len(input)
+        else:
+            return self.loss(input, target_is_real, for_discriminator)
+def get_nonspade_norm_layer(norm_type='instance'):
+    def get_out_channel(layer):
+        if hasattr(layer, 'out_channels'):
+            return getattr(layer, 'out_channels')
+        return layer.weight.size(0)
+    def add_norm_layer(layer):
+        nonlocal norm_type
+        if norm_type.startswith('spectral'):
+            layer = spectral_norm(layer)
+            subnorm_type = norm_type[len('spectral'):]
+        if subnorm_type == 'none' or len(subnorm_type) == 0:
+            return layer
+        # remove bias in the previous layer, which is meaningless
+        # since it has no effect after normalization
+        if getattr(layer, 'bias', None) is not None:
+            delattr(layer, 'bias')
+            layer.register_parameter('bias', None)
+        if subnorm_type == 'batch':
+            norm_layer = nn.BatchNorm2d(get_out_channel(layer), affine=True)
+        # elif subnorm_type == 'sync_batch':
+        #     norm_layer = SynchronizedBatchNorm2d(get_out_channel(layer), affine=True)
+        elif subnorm_type == 'instance':
+            norm_layer = nn.InstanceNorm2d(get_out_channel(layer), affine=False)
+        else:
+            raise ValueError('normalization layer %s is not recognized' % subnorm_type)
+        return nn.Sequential(layer, norm_layer)
+    return add_norm_layer

networks.py ADDED Viewed

	@@ -0,0 +1,453 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from torchvision import models
+import os
+from torch.nn.utils import spectral_norm
+import numpy as np
+import functools
+class ConditionGenerator(nn.Module):
+    def __init__(self, opt, input1_nc, input2_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d):
+        super(ConditionGenerator, self).__init__()
+        self.warp_feature = opt.warp_feature
+        self.out_layer_opt = opt.out_layer
+        self.ClothEncoder = nn.Sequential(
+            ResBlock(input1_nc, ngf, norm_layer=norm_layer, scale='down'),  # 128
+            ResBlock(ngf, ngf * 2, norm_layer=norm_layer, scale='down'),  # 64
+            ResBlock(ngf * 2, ngf * 4, norm_layer=norm_layer, scale='down'),  # 32
+            ResBlock(ngf * 4, ngf * 4, norm_layer=norm_layer, scale='down'),  # 16
+            ResBlock(ngf * 4, ngf * 4, norm_layer=norm_layer, scale='down')  # 8
+        )
+        self.PoseEncoder = nn.Sequential(
+            ResBlock(input2_nc, ngf, norm_layer=norm_layer, scale='down'),
+            ResBlock(ngf, ngf * 2, norm_layer=norm_layer, scale='down'),
+            ResBlock(ngf * 2, ngf * 4, norm_layer=norm_layer, scale='down'),
+            ResBlock(ngf * 4, ngf * 4, norm_layer=norm_layer, scale='down'),
+            ResBlock(ngf * 4, ngf * 4, norm_layer=norm_layer, scale='down')
+        )
+        self.conv = ResBlock(ngf * 4, ngf * 8, norm_layer=norm_layer, scale='same')
+        if opt.warp_feature == 'T1':
+            # in_nc -> skip connection + T1, T2 channel
+            self.SegDecoder = nn.Sequential(
+                ResBlock(ngf * 8, ngf * 4, norm_layer=norm_layer, scale='up'),  # 16
+                ResBlock(ngf * 4 * 2 + ngf * 4 , ngf * 4, norm_layer=norm_layer, scale='up'),  # 32
+                ResBlock(ngf * 4 * 2 + ngf * 4 , ngf * 2, norm_layer=norm_layer, scale='up'),  # 64
+                ResBlock(ngf * 2 * 2 + ngf * 4 , ngf, norm_layer=norm_layer, scale='up'),  # 128
+                ResBlock(ngf * 1 * 2 + ngf * 4, ngf, norm_layer=norm_layer, scale='up')  # 256
+            )
+        if opt.warp_feature == 'encoder':
+            # in_nc -> [x, skip_connection, warped_cloth_encoder_feature(E1)]
+            self.SegDecoder = nn.Sequential(
+                ResBlock(ngf * 8, ngf * 4, norm_layer=norm_layer, scale='up'),  # 16
+                ResBlock(ngf * 4 * 3, ngf * 4, norm_layer=norm_layer, scale='up'),  # 32
+                ResBlock(ngf * 4 * 3, ngf * 2, norm_layer=norm_layer, scale='up'),  # 64
+                ResBlock(ngf * 2 * 3, ngf, norm_layer=norm_layer, scale='up'),  # 128
+                ResBlock(ngf * 1 * 3, ngf, norm_layer=norm_layer, scale='up')  # 256
+            )
+        if opt.out_layer == 'relu':
+            self.out_layer = ResBlock(ngf + input1_nc + input2_nc, output_nc, norm_layer=norm_layer, scale='same')
+        if opt.out_layer == 'conv':
+            self.out_layer = nn.Sequential(
+                ResBlock(ngf + input1_nc + input2_nc, ngf, norm_layer=norm_layer, scale='same'),
+                nn.Conv2d(ngf, output_nc, kernel_size=1, bias=True)
+            )
+        # Cloth Conv 1x1
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(ngf, ngf * 4, kernel_size=1, bias=True),
+            nn.Conv2d(ngf * 2, ngf * 4, kernel_size=1, bias=True),
+            nn.Conv2d(ngf * 4, ngf * 4, kernel_size=1, bias=True),
+            nn.Conv2d(ngf * 4, ngf * 4, kernel_size=1, bias=True),
+        )
+        # Person Conv 1x1
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(ngf, ngf * 4, kernel_size=1, bias=True),
+            nn.Conv2d(ngf * 2, ngf * 4, kernel_size=1, bias=True),
+            nn.Conv2d(ngf * 4, ngf * 4, kernel_size=1, bias=True),
+            nn.Conv2d(ngf * 4, ngf * 4, kernel_size=1, bias=True),
+        )
+        self.flow_conv = nn.ModuleList([
+            nn.Conv2d(ngf * 8, 2, kernel_size=3, stride=1, padding=1, bias=True),
+            nn.Conv2d(ngf * 8, 2, kernel_size=3, stride=1, padding=1, bias=True),
+            nn.Conv2d(ngf * 8, 2, kernel_size=3, stride=1, padding=1, bias=True),
+            nn.Conv2d(ngf * 8, 2, kernel_size=3, stride=1, padding=1, bias=True),
+            nn.Conv2d(ngf * 8, 2, kernel_size=3, stride=1, padding=1, bias=True),
+        ]
+        )
+        self.bottleneck = nn.Sequential(
+            nn.Sequential(nn.Conv2d(ngf * 4, ngf * 4, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU()),
+            nn.Sequential(nn.Conv2d(ngf * 4, ngf * 4, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU()),
+            nn.Sequential(nn.Conv2d(ngf * 2, ngf * 4, kernel_size=3, stride=1, padding=1, bias=True) , nn.ReLU()),
+            nn.Sequential(nn.Conv2d(ngf, ngf * 4, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU()),
+        )
+    def normalize(self, x):
+        return x
+    def forward(self,opt,input1, input2, upsample='bilinear'):
+        E1_list = []
+        E2_list = []
+        flow_list = []
+        # warped_grid_list = []
+        # Feature Pyramid Network
+        for i in range(5):
+            if i == 0:
+                E1_list.append(self.ClothEncoder[i](input1))
+                E2_list.append(self.PoseEncoder[i](input2))
+            else:
+                E1_list.append(self.ClothEncoder[i](E1_list[i - 1]))
+                E2_list.append(self.PoseEncoder[i](E2_list[i - 1]))
+        # Compute Clothflow
+        for i in range(5):
+            N, _, iH, iW = E1_list[4 - i].size()
+            grid = make_grid(N, iH, iW,opt)
+            if i == 0:
+                T1 = E1_list[4 - i]  # (ngf * 4) x 8 x 6
+                T2 = E2_list[4 - i]
+                E4 = torch.cat([T1, T2], 1)
+                flow = self.flow_conv[i](self.normalize(E4)).permute(0, 2, 3, 1)
+                flow_list.append(flow)
+                x = self.conv(T2)
+                x = self.SegDecoder[i](x)
+            else:
+                T1 = F.interpolate(T1, scale_factor=2, mode=upsample) + self.conv1[4 - i](E1_list[4 - i])
+                T2 = F.interpolate(T2, scale_factor=2, mode=upsample) + self.conv2[4 - i](E2_list[4 - i])
+                flow = F.interpolate(flow_list[i - 1].permute(0, 3, 1, 2), scale_factor=2, mode=upsample).permute(0, 2, 3, 1)  # upsample n-1 flow
+                flow_norm = torch.cat([flow[:, :, :, 0:1] / ((iW/2 - 1.0) / 2.0), flow[:, :, :, 1:2] / ((iH/2 - 1.0) / 2.0)], 3)
+                warped_T1 = F.grid_sample(T1, flow_norm + grid, padding_mode='border')
+                flow = flow + self.flow_conv[i](self.normalize(torch.cat([warped_T1, self.bottleneck[i-1](x)], 1))).permute(0, 2, 3, 1)  # F(n)
+                flow_list.append(flow)
+                if self.warp_feature == 'T1':
+                    x = self.SegDecoder[i](torch.cat([x, E2_list[4-i], warped_T1], 1))
+                if self.warp_feature == 'encoder':
+                    warped_E1 = F.grid_sample(E1_list[4-i], flow_norm + grid, padding_mode='border')
+                    x = self.SegDecoder[i](torch.cat([x, E2_list[4-i], warped_E1], 1))
+        N, _, iH, iW = input1.size()
+        grid = make_grid(N, iH, iW,opt)
+        flow = F.interpolate(flow_list[-1].permute(0, 3, 1, 2), scale_factor=2, mode=upsample).permute(0, 2, 3, 1)
+        flow_norm = torch.cat([flow[:, :, :, 0:1] / ((iW/2 - 1.0) / 2.0), flow[:, :, :, 1:2] / ((iH/2 - 1.0) / 2.0)], 3)
+        warped_input1 = F.grid_sample(input1, flow_norm + grid, padding_mode='border')
+        x = self.out_layer(torch.cat([x, input2, warped_input1], 1))
+        warped_c = warped_input1[:, :-1, :, :]
+        warped_cm = warped_input1[:, -1:, :, :]
+        return flow_list, x, warped_c, warped_cm
+def make_grid(N, iH, iW,opt):
+    grid_x = torch.linspace(-1.0, 1.0, iW).view(1, 1, iW, 1).expand(N, iH, -1, -1)
+    grid_y = torch.linspace(-1.0, 1.0, iH).view(1, iH, 1, 1).expand(N, -1, iW, -1)
+    if opt.cuda :
+        grid = torch.cat([grid_x, grid_y], 3).cuda()
+    else:
+        grid = torch.cat([grid_x, grid_y], 3)
+    return grid
+class ResBlock(nn.Module):
+    def __init__(self, in_nc, out_nc, scale='down', norm_layer=nn.BatchNorm2d):
+        super(ResBlock, self).__init__()
+        use_bias = norm_layer == nn.InstanceNorm2d
+        assert scale in ['up', 'down', 'same'], "ResBlock scale must be in 'up' 'down' 'same'"
+        if scale == 'same':
+            self.scale = nn.Conv2d(in_nc, out_nc, kernel_size=1, bias=True)
+        if scale == 'up':
+            self.scale = nn.Sequential(
+                nn.Upsample(scale_factor=2, mode='bilinear'),
+                nn.Conv2d(in_nc, out_nc, kernel_size=1,bias=True)
+            )
+        if scale == 'down':
+            self.scale = nn.Conv2d(in_nc, out_nc, kernel_size=3, stride=2, padding=1, bias=use_bias)
+        self.block = nn.Sequential(
+            nn.Conv2d(out_nc, out_nc, kernel_size=3, stride=1, padding=1, bias=use_bias),
+            norm_layer(out_nc),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_nc, out_nc, kernel_size=3, stride=1, padding=1, bias=use_bias),
+            norm_layer(out_nc)
+        )
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        residual = self.scale(x)
+        return self.relu(residual + self.block(residual))
+class Vgg19(nn.Module):
+    def __init__(self, requires_grad=False):
+        super(Vgg19, self).__init__()
+        vgg_pretrained_features = models.vgg19(pretrained=True).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        for x in range(2):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(2, 7):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(7, 12):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(12, 21):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(21, 30):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h_relu1 = self.slice1(X)
+        h_relu2 = self.slice2(h_relu1)
+        h_relu3 = self.slice3(h_relu2)
+        h_relu4 = self.slice4(h_relu3)
+        h_relu5 = self.slice5(h_relu4)
+        out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5]
+        return out
+class VGGLoss(nn.Module):
+    def __init__(self, opt,layids = None):
+        super(VGGLoss, self).__init__()
+        self.vgg = Vgg19()
+        if opt.cuda:
+            self.vgg.cuda()
+        self.criterion = nn.L1Loss()
+        self.weights = [1.0/32, 1.0/16, 1.0/8, 1.0/4, 1.0]
+        self.layids = layids
+    def forward(self, x, y):
+        x_vgg, y_vgg = self.vgg(x), self.vgg(y)
+        loss = 0
+        if self.layids is None:
+            self.layids = list(range(len(x_vgg)))
+        for i in self.layids:
+            loss += self.weights[i] * self.criterion(x_vgg[i], y_vgg[i].detach())
+        return loss
+# Defines the GAN loss which uses either LSGAN or the regular GAN.
+# When LSGAN is used, it is basically same as MSELoss,
+# but it abstracts away the need to create the target label tensor
+# that has the same size as the input
+class GANLoss(nn.Module):
+    def __init__(self, use_lsgan=True, target_real_label=1.0, target_fake_label=0.0,
+                 tensor=torch.FloatTensor):
+        super(GANLoss, self).__init__()
+        self.real_label = target_real_label
+        self.fake_label = target_fake_label
+        self.real_label_var = None
+        self.fake_label_var = None
+        self.Tensor = tensor
+        if use_lsgan:
+            self.loss = nn.MSELoss()
+        else:
+            self.loss = nn.BCELoss()
+    def get_target_tensor(self, input, target_is_real):
+        if target_is_real:
+            create_label = ((self.real_label_var is None) or
+                            (self.real_label_var.numel() != input.numel()))
+            if create_label:
+                real_tensor = self.Tensor(input.size()).fill_(self.real_label)
+                self.real_label_var = Variable(real_tensor, requires_grad=False)
+            target_tensor = self.real_label_var
+        else:
+            create_label = ((self.fake_label_var is None) or
+                            (self.fake_label_var.numel() != input.numel()))
+            if create_label:
+                fake_tensor = self.Tensor(input.size()).fill_(self.fake_label)
+                self.fake_label_var = Variable(fake_tensor, requires_grad=False)
+            target_tensor = self.fake_label_var
+        return target_tensor
+    def __call__(self, input, target_is_real):
+        if isinstance(input[0], list):
+            loss = 0
+            for input_i in input:
+                pred = input_i[-1]
+                target_tensor = self.get_target_tensor(pred, target_is_real)
+                loss += self.loss(pred, target_tensor)
+            return loss
+        else:
+            target_tensor = self.get_target_tensor(input[-1], target_is_real)
+            return self.loss(input[-1], target_tensor)
+class MultiscaleDiscriminator(nn.Module):
+    def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d,
+                 use_sigmoid=False, num_D=3, getIntermFeat=False, Ddownx2=False, Ddropout=False, spectral=False):
+        super(MultiscaleDiscriminator, self).__init__()
+        self.num_D = num_D
+        self.n_layers = n_layers
+        self.getIntermFeat = getIntermFeat
+        self.Ddownx2 = Ddownx2
+        for i in range(num_D):
+            netD = NLayerDiscriminator(input_nc, ndf, n_layers, norm_layer, use_sigmoid, getIntermFeat, Ddropout, spectral=spectral)
+            if getIntermFeat:
+                for j in range(n_layers + 2):
+                    setattr(self, 'scale' + str(i) + '_layer' + str(j), getattr(netD, 'model' + str(j)))
+            else:
+                setattr(self, 'layer' + str(i), netD.model)
+        self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False)
+    def singleD_forward(self, model, input):
+        if self.getIntermFeat:
+            result = [input]
+            for i in range(len(model)):
+                result.append(model[i](result[-1]))
+            return result[1:]
+        else:
+            return [model(input)]
+    def forward(self, input):
+        num_D = self.num_D
+        result = []
+        if self.Ddownx2:
+            input_downsampled = self.downsample(input)
+        else:
+            input_downsampled = input
+        for i in range(num_D):
+            if self.getIntermFeat:
+                model = [getattr(self, 'scale' + str(num_D - 1 - i) + '_layer' + str(j)) for j in
+                         range(self.n_layers + 2)]
+            else:
+                model = getattr(self, 'layer' + str(num_D - 1 - i))
+            result.append(self.singleD_forward(model, input_downsampled))
+            if i != (num_D - 1):
+                input_downsampled = self.downsample(input_downsampled)
+        return result
+class NLayerDiscriminator(nn.Module):
+    def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False, getIntermFeat=False, Ddropout=False, spectral=False):
+        super(NLayerDiscriminator, self).__init__()
+        self.getIntermFeat = getIntermFeat
+        self.n_layers = n_layers
+        self.spectral_norm = spectral_norm if spectral else lambda x: x
+        kw = 4
+        padw = int(np.ceil((kw - 1.0) / 2))
+        sequence = [[nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]]
+        nf = ndf
+        for n in range(1, n_layers):
+            nf_prev = nf
+            nf = min(nf * 2, 512)
+            if Ddropout:
+                sequence += [[
+                self.spectral_norm(nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=padw)),
+                norm_layer(nf), nn.LeakyReLU(0.2, True), nn.Dropout(0.5)
+            ]]
+            else:
+                sequence += [[
+                    self.spectral_norm(nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=padw)),
+                    norm_layer(nf), nn.LeakyReLU(0.2, True)
+                ]]
+        nf_prev = nf
+        nf = min(nf * 2, 512)
+        sequence += [[
+            nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw),
+            norm_layer(nf),
+            nn.LeakyReLU(0.2, True)
+        ]]
+        sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]]
+        if use_sigmoid:
+            sequence += [[nn.Sigmoid()]]
+        if getIntermFeat:
+            for n in range(len(sequence)):
+                setattr(self, 'model' + str(n), nn.Sequential(*sequence[n]))
+        else:
+            sequence_stream = []
+            for n in range(len(sequence)):
+                sequence_stream += sequence[n]
+            self.model = nn.Sequential(*sequence_stream)
+    def forward(self, input):
+        if self.getIntermFeat:
+            res = [input]
+            for n in range(self.n_layers + 2):
+                model = getattr(self, 'model' + str(n))
+                res.append(model(res[-1]))
+            return res[1:]
+        else:
+            return self.model(input)
+def save_checkpoint(model, save_path,opt):
+    if not os.path.exists(os.path.dirname(save_path)):
+        os.makedirs(os.path.dirname(save_path))
+    torch.save(model.cpu().state_dict(), save_path)
+    if opt.cuda :
+        model.cuda()
+def load_checkpoint(model, checkpoint_path,opt):
+    if not os.path.exists(checkpoint_path):
+        print('no checkpoint')
+        raise
+    log = model.load_state_dict(torch.load(checkpoint_path), strict=False)
+    if opt.cuda :
+        model.cuda()
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv2d') != -1:
+        m.weight.data.normal_(0.0, 0.02)
+    elif classname.find('BatchNorm2d') != -1:
+        m.weight.data.normal_(1.0, 0.02)
+        m.bias.data.fill_(0)
+def get_norm_layer(norm_type='instance'):
+    if norm_type == 'batch':
+        norm_layer = functools.partial(nn.BatchNorm2d, affine=True)
+    elif norm_type == 'instance':
+        norm_layer = functools.partial(nn.InstanceNorm2d, affine=False)
+    else:
+        raise NotImplementedError('normalization layer [%s] is not found' % norm_type)
+    return norm_layer
+def define_D(input_nc, ndf=64, n_layers_D=3, norm='instance', use_sigmoid=False, num_D=2, getIntermFeat=False, gpu_ids=[], Ddownx2=False, Ddropout=False, spectral=False):
+    norm_layer = get_norm_layer(norm_type=norm)
+    netD = MultiscaleDiscriminator(input_nc, ndf, n_layers_D, norm_layer, use_sigmoid, num_D, getIntermFeat, Ddownx2, Ddropout, spectral=spectral)
+    print(netD)
+    if len(gpu_ids) > 0:
+        assert (torch.cuda.is_available())
+        netD.cuda()
+    netD.apply(weights_init)
+    return netD

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+torch
+torchvision
+torchaudio
+opencv-python
+torchgeometry
+Pillow
+tqdm
+tensorboardX
+scikit-image
+scipy
+streamlit-image-select
+pandas

test_generator.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import torch
+import torch.nn as nn
+from torchvision.utils import make_grid as make_image_grid
+from torchvision.utils import save_image
+import argparse
+import os
+import time
+from cp_dataset_test import CPDatasetTest, CPDataLoader
+from networks import ConditionGenerator, load_checkpoint, make_grid
+from network_generator import SPADEGenerator
+from tensorboardX import SummaryWriter
+from utils import *
+import torchgeometry as tgm
+from collections import OrderedDict
+def remove_overlap(seg_out, warped_cm):
+    assert len(warped_cm.shape) == 4
+    warped_cm = warped_cm - (torch.cat([seg_out[:, 1:3, :, :], seg_out[:, 5:, :, :]], dim=1)).sum(dim=1, keepdim=True) * warped_cm
+    return warped_cm
+def get_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--gpu_ids", default="")
+    parser.add_argument('-j', '--workers', type=int, default=4)
+    parser.add_argument('-b', '--batch-size', type=int, default=1)
+    parser.add_argument('--fp16', action='store_true', help='use amp')
+    # Cuda availability
+    parser.add_argument('--cuda',default=False, help='cuda or cpu')
+    parser.add_argument('--test_name', type=str, default='test', help='test name')
+    parser.add_argument("--dataroot", default="./data/zalando-hd-resize")
+    parser.add_argument("--datamode", default="test")
+    parser.add_argument("--data_list", default="test_pairs.txt")
+    parser.add_argument("--output_dir", type=str, default="./Output")
+    parser.add_argument("--datasetting", default="unpaired")
+    parser.add_argument("--fine_width", type=int, default=768)
+    parser.add_argument("--fine_height", type=int, default=1024)
+    parser.add_argument('--tensorboard_dir', type=str, default='./data/zalando-hd-resize/tensorboard', help='save tensorboard infos')
+    parser.add_argument('--checkpoint_dir', type=str, default='checkpoints', help='save checkpoint infos')
+    parser.add_argument('--tocg_checkpoint', type=str, default='./eval_models/weights/v0.1/mtviton.pth', help='tocg checkpoint')
+    parser.add_argument('--gen_checkpoint', type=str, default='./eval_models/weights/v0.1/gen.pth', help='G checkpoint')
+    parser.add_argument("--tensorboard_count", type=int, default=100)
+    parser.add_argument("--shuffle", action='store_true', help='shuffle input data')
+    parser.add_argument("--semantic_nc", type=int, default=13)
+    parser.add_argument("--output_nc", type=int, default=13)
+    parser.add_argument('--gen_semantic_nc', type=int, default=7, help='# of input label classes without unknown class')
+    # network
+    parser.add_argument("--warp_feature", choices=['encoder', 'T1'], default="T1")
+    parser.add_argument("--out_layer", choices=['relu', 'conv'], default="relu")
+    # training
+    parser.add_argument("--clothmask_composition", type=str, choices=['no_composition', 'detach', 'warp_grad'], default='warp_grad')
+    # Hyper-parameters
+    parser.add_argument('--upsample', type=str, default='bilinear', choices=['nearest', 'bilinear'])
+    parser.add_argument('--occlusion', action='store_true', help="Occlusion handling")
+    # generator
+    parser.add_argument('--norm_G', type=str, default='spectralaliasinstance', help='instance normalization or batch normalization')
+    parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in first conv layer')
+    parser.add_argument('--init_type', type=str, default='xavier', help='network initialization [normal|xavier|kaiming|orthogonal]')
+    parser.add_argument('--init_variance', type=float, default=0.02, help='variance of the initialization distribution')
+    parser.add_argument('--num_upsampling_layers', choices=('normal', 'more', 'most'), default='most', # normal: 256, more: 512
+                        help="If 'more', adds upsampling layer between the two middle resnet blocks. If 'most', also add one more upsampling + resnet layer at the end of the generator")
+    opt = parser.parse_args()
+    return opt
+def load_checkpoint_G(model, checkpoint_path,opt):
+    if not os.path.exists(checkpoint_path):
+        print("Invalid path!")
+        return
+    state_dict = torch.load(checkpoint_path)
+    new_state_dict = OrderedDict([(k.replace('ace', 'alias').replace('.Spade', ''), v) for (k, v) in state_dict.items()])
+    new_state_dict._metadata = OrderedDict([(k.replace('ace', 'alias').replace('.Spade', ''), v) for (k, v) in state_dict._metadata.items()])
+    model.load_state_dict(new_state_dict, strict=True)
+    if opt.cuda :
+        model.cuda()
+def test(opt, test_loader, tocg, generator):
+    gauss = tgm.image.GaussianBlur((15, 15), (3, 3))
+    if opt.cuda:
+        gauss = gauss.cuda()
+    # Model
+    if opt.cuda :
+        tocg.cuda()
+    tocg.eval()
+    generator.eval()
+    if opt.output_dir is not None:
+        output_dir = opt.output_dir
+    else:
+        output_dir = os.path.join('./output', opt.test_name,
+                            opt.datamode, opt.datasetting, 'generator', 'output')
+    grid_dir = os.path.join('./output', opt.test_name,
+                             opt.datamode, opt.datasetting, 'generator', 'grid')
+    os.makedirs(grid_dir, exist_ok=True)
+    os.makedirs(output_dir, exist_ok=True)
+    num = 0
+    iter_start_time = time.time()
+    with torch.no_grad():
+        for inputs in test_loader.data_loader:
+            if opt.cuda :
+                pose_map = inputs['pose'].cuda()
+                pre_clothes_mask = inputs['cloth_mask'][opt.datasetting].cuda()
+                label = inputs['parse']
+                parse_agnostic = inputs['parse_agnostic']
+                agnostic = inputs['agnostic'].cuda()
+                clothes = inputs['cloth'][opt.datasetting].cuda() # target cloth
+                densepose = inputs['densepose'].cuda()
+                im = inputs['image']
+                input_label, input_parse_agnostic = label.cuda(), parse_agnostic.cuda()
+                pre_clothes_mask = torch.FloatTensor((pre_clothes_mask.detach().cpu().numpy() > 0.5).astype(np.float)).cuda()
+            else :
+                pose_map = inputs['pose']
+                pre_clothes_mask = inputs['cloth_mask'][opt.datasetting]
+                label = inputs['parse']
+                parse_agnostic = inputs['parse_agnostic']
+                agnostic = inputs['agnostic']
+                clothes = inputs['cloth'][opt.datasetting] # target cloth
+                densepose = inputs['densepose']
+                im = inputs['image']
+                input_label, input_parse_agnostic = label, parse_agnostic
+                pre_clothes_mask = torch.FloatTensor((pre_clothes_mask.detach().cpu().numpy() > 0.5).astype(np.float))
+            # down
+            pose_map_down = F.interpolate(pose_map, size=(256, 192), mode='bilinear')
+            pre_clothes_mask_down = F.interpolate(pre_clothes_mask, size=(256, 192), mode='nearest')
+            input_label_down = F.interpolate(input_label, size=(256, 192), mode='bilinear')
+            input_parse_agnostic_down = F.interpolate(input_parse_agnostic, size=(256, 192), mode='nearest')
+            agnostic_down = F.interpolate(agnostic, size=(256, 192), mode='nearest')
+            clothes_down = F.interpolate(clothes, size=(256, 192), mode='bilinear')
+            densepose_down = F.interpolate(densepose, size=(256, 192), mode='bilinear')
+            shape = pre_clothes_mask.shape
+            # multi-task inputs
+            input1 = torch.cat([clothes_down, pre_clothes_mask_down], 1)
+            input2 = torch.cat([input_parse_agnostic_down, densepose_down], 1)
+            # forward
+            flow_list, fake_segmap, warped_cloth_paired, warped_clothmask_paired = tocg(opt,input1, input2)
+            # warped cloth mask one hot
+            if opt.cuda :
+                warped_cm_onehot = torch.FloatTensor((warped_clothmask_paired.detach().cpu().numpy() > 0.5).astype(np.float)).cuda()
+            else :
+                warped_cm_onehot = torch.FloatTensor((warped_clothmask_paired.detach().cpu().numpy() > 0.5).astype(np.float))
+            if opt.clothmask_composition != 'no_composition':
+                if opt.clothmask_composition == 'detach':
+                    cloth_mask = torch.ones_like(fake_segmap)
+                    cloth_mask[:,3:4, :, :] = warped_cm_onehot
+                    fake_segmap = fake_segmap * cloth_mask
+                if opt.clothmask_composition == 'warp_grad':
+                    cloth_mask = torch.ones_like(fake_segmap)
+                    cloth_mask[:,3:4, :, :] = warped_clothmask_paired
+                    fake_segmap = fake_segmap * cloth_mask
+            # make generator input parse map
+            fake_parse_gauss = gauss(F.interpolate(fake_segmap, size=(opt.fine_height, opt.fine_width), mode='bilinear'))
+            fake_parse = fake_parse_gauss.argmax(dim=1)[:, None]
+            if opt.cuda :
+                old_parse = torch.FloatTensor(fake_parse.size(0), 13, opt.fine_height, opt.fine_width).zero_().cuda()
+            else:
+                old_parse = torch.FloatTensor(fake_parse.size(0), 13, opt.fine_height, opt.fine_width).zero_()
+            old_parse.scatter_(1, fake_parse, 1.0)
+            labels = {
+                0:  ['background',  [0]],
+                1:  ['paste',       [2, 4, 7, 8, 9, 10, 11]],
+                2:  ['upper',       [3]],
+                3:  ['hair',        [1]],
+                4:  ['left_arm',    [5]],
+                5:  ['right_arm',   [6]],
+                6:  ['noise',       [12]]
+            }
+            if opt.cuda :
+                parse = torch.FloatTensor(fake_parse.size(0), 7, opt.fine_height, opt.fine_width).zero_().cuda()
+            else:
+                parse = torch.FloatTensor(fake_parse.size(0), 7, opt.fine_height, opt.fine_width).zero_()
+            for i in range(len(labels)):
+                for label in labels[i][1]:
+                    parse[:, i] += old_parse[:, label]
+            # warped cloth
+            N, _, iH, iW = clothes.shape
+            flow = F.interpolate(flow_list[-1].permute(0, 3, 1, 2), size=(iH, iW), mode='bilinear').permute(0, 2, 3, 1)
+            flow_norm = torch.cat([flow[:, :, :, 0:1] / ((96 - 1.0) / 2.0), flow[:, :, :, 1:2] / ((128 - 1.0) / 2.0)], 3)
+            grid = make_grid(N, iH, iW,opt)
+            warped_grid = grid + flow_norm
+            warped_cloth = F.grid_sample(clothes, warped_grid, padding_mode='border')
+            warped_clothmask = F.grid_sample(pre_clothes_mask, warped_grid, padding_mode='border')
+            if opt.occlusion:
+                warped_clothmask = remove_overlap(F.softmax(fake_parse_gauss, dim=1), warped_clothmask)
+                warped_cloth = warped_cloth * warped_clothmask + torch.ones_like(warped_cloth) * (1-warped_clothmask)
+            output = generator(torch.cat((agnostic, densepose, warped_cloth), dim=1), parse)
+            # visualize
+            unpaired_names = []
+            for i in range(shape[0]):
+                grid = make_image_grid([(clothes[i].cpu() / 2 + 0.5), (pre_clothes_mask[i].cpu()).expand(3, -1, -1), visualize_segmap(parse_agnostic.cpu(), batch=i), ((densepose.cpu()[i]+1)/2),
+                                        (warped_cloth[i].cpu().detach() / 2 + 0.5), (warped_clothmask[i].cpu().detach()).expand(3, -1, -1), visualize_segmap(fake_parse_gauss.cpu(), batch=i),
+                                        (pose_map[i].cpu()/2 +0.5), (warped_cloth[i].cpu()/2 + 0.5), (agnostic[i].cpu()/2 + 0.5),
+                                        (im[i]/2 +0.5), (output[i].cpu()/2 +0.5)],
+                                        nrow=4)
+                unpaired_name = (inputs['c_name']['paired'][i].split('.')[0] + '_' + inputs['c_name'][opt.datasetting][i].split('.')[0] + '.png')
+                save_image(grid, os.path.join(grid_dir, unpaired_name))
+                unpaired_names.append(unpaired_name)
+            # save output
+            save_images(output, unpaired_names, output_dir)
+            num += shape[0]
+            print(num)
+    print(f"Test time {time.time() - iter_start_time}")
+def main():
+    opt = get_opt()
+    print(opt)
+    print("Start to test %s!")
+    os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu_ids
+    # create test dataset & loader
+    test_dataset = CPDatasetTest(opt)
+    test_loader = CPDataLoader(opt, test_dataset)
+    # visualization
+    # if not os.path.exists(opt.tensorboard_dir):
+    #     os.makedirs(opt.tensorboard_dir)
+    # board = SummaryWriter(log_dir=os.path.join(opt.tensorboard_dir, opt.test_name, opt.datamode, opt.datasetting))
+    ## Model
+    # tocg
+    input1_nc = 4  # cloth + cloth-mask
+    input2_nc = opt.semantic_nc + 3  # parse_agnostic + densepose
+    tocg = ConditionGenerator(opt, input1_nc=input1_nc, input2_nc=input2_nc, output_nc=opt.output_nc, ngf=96, norm_layer=nn.BatchNorm2d)
+    # generator
+    opt.semantic_nc = 7
+    generator = SPADEGenerator(opt, 3+3+3)
+    generator.print_network()
+    # Load Checkpoint
+    load_checkpoint(tocg, opt.tocg_checkpoint,opt)
+    load_checkpoint_G(generator, opt.gen_checkpoint,opt)
+    # Train
+    test(opt, test_loader, tocg, generator)
+    print("Finished testing!")
+if __name__ == "__main__":
+    main()

utils.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import torch
+from torchvision import transforms
+from PIL import Image
+import torch.nn.functional as F
+import numpy as np
+import cv2
+import os
+def get_clothes_mask(old_label) :
+    clothes = torch.FloatTensor((old_label.cpu().numpy() == 3).astype(np.int))
+    return clothes
+def changearm(old_label):
+    label=old_label
+    arm1=torch.FloatTensor((old_label.cpu().numpy()==5).astype(np.int))
+    arm2=torch.FloatTensor((old_label.cpu().numpy()==6).astype(np.int))
+    label=label*(1-arm1)+arm1*3
+    label=label*(1-arm2)+arm2*3
+    return label
+def gen_noise(shape):
+    noise = np.zeros(shape, dtype=np.uint8)
+    ### noise
+    noise = cv2.randn(noise, 0, 255)
+    noise = np.asarray(noise / 255, dtype=np.uint8)
+    noise = torch.tensor(noise, dtype=torch.float32)
+    return noise
+def cross_entropy2d(input, target, weight=None, size_average=True):
+    n, c, h, w = input.size()
+    nt, ht, wt = target.size()
+    # Handle inconsistent size between input and target
+    if h != ht or w != wt:
+        input = F.interpolate(input, size=(ht, wt), mode="bilinear", align_corners=True)
+    input = input.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
+    target = target.view(-1)
+    loss = F.cross_entropy(
+        input, target, weight=weight, size_average=size_average, ignore_index=250
+    )
+    return loss
+def ndim_tensor2im(image_tensor, imtype=np.uint8, batch=0):
+    image_numpy = image_tensor[batch].cpu().float().numpy()
+    result = np.argmax(image_numpy, axis=0)
+    return result.astype(imtype)
+def visualize_segmap(input, multi_channel=True, tensor_out=True, batch=0) :
+    palette = [
+        0, 0, 0, 128, 0, 0, 254, 0, 0, 0, 85, 0, 169, 0, 51,
+        254, 85, 0, 0, 0, 85, 0, 119, 220, 85, 85, 0, 0, 85, 85,
+        85, 51, 0, 52, 86, 128, 0, 128, 0, 0, 0, 254, 51, 169, 220,
+        0, 254, 254, 85, 254, 169, 169, 254, 85, 254, 254, 0, 254, 169, 0
+    ]
+    input = input.detach()
+    if multi_channel :
+        input = ndim_tensor2im(input,batch=batch)
+    else :
+        input = input[batch][0].cpu()
+        input = np.asarray(input)
+        input = input.astype(np.uint8)
+    input = Image.fromarray(input, 'P')
+    input.putpalette(palette)
+    if tensor_out :
+        trans = transforms.ToTensor()
+        return trans(input.convert('RGB'))
+    return input
+def pred_to_onehot(prediction) :
+    size = prediction.shape
+    prediction_max = torch.argmax(prediction, dim=1)
+    oneHot_size = (size[0], 13, size[2], size[3])
+    pred_onehot = torch.FloatTensor(torch.Size(oneHot_size)).zero_()
+    pred_onehot = pred_onehot.scatter_(1, prediction_max.unsqueeze(1).data.long(), 1.0)
+    return pred_onehot
+def cal_miou(prediction, target) :
+    size = prediction.shape
+    target = target.cpu()
+    prediction = pred_to_onehot(prediction.detach().cpu())
+    list = [1,2,3,4,5,6,7,8]
+    union = 0
+    intersection = 0
+    for b in range(size[0]) :
+        for c in list :
+            intersection += torch.logical_and(target[b,c], prediction[b,c]).sum()
+            union += torch.logical_or(target[b,c], prediction[b,c]).sum()
+    return intersection.item()/union.item()
+def save_images(img_tensors, img_names, save_dir):
+    for img_tensor, img_name in zip(img_tensors, img_names):
+        tensor = (img_tensor.clone() + 1) * 0.5 * 255
+        tensor = tensor.cpu().clamp(0, 255)
+        try:
+            array = tensor.numpy().astype('uint8')
+        except:
+            array = tensor.detach().numpy().astype('uint8')
+        if array.shape[0] == 1:
+            array = array.squeeze(0)
+        elif array.shape[0] == 3:
+            array = array.swapaxes(0, 1).swapaxes(1, 2)
+        im = Image.fromarray(array)
+        im.save(os.path.join(save_dir, img_name), format='JPEG')
+def create_network(cls, opt):
+    net = cls(opt)
+    net.print_network()
+    if len(opt.gpu_ids) > 0:
+        assert(torch.cuda.is_available())
+        net.cuda()
+    net.init_weights(opt.init_type, opt.init_variance)
+    return net