Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

DTGM_model_167500.pt +3 -0
eval.py +178 -0
gdtls.py +505 -0
models.py +273 -0

DTGM_model_167500.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac52dc0c74c44bac9506bec31e9d94cadd40dd44921dea65401bb79d4b3af308
+size 1250282226

eval.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import math
+import torch.cuda
+from util.models import *
+from gdtls import DTLS, Trainer
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument('--device', default="cuda:0", type=str)
+parser.add_argument('--hr_size', default=256, type=int, help="size of HR image")
+parser.add_argument('--lr_size', default=2, type=int, help="size of LR image")
+parser.add_argument('--interval_mode', default="fibonacci", type=str, help="linear; exp; fibonacci")
+parser.add_argument('--stride', default=2, type=int, help="size change between each step if linear mode is used")
+parser.add_argument('--train_steps', default=200001, type=int)
+parser.add_argument('--lr_rate', default=2e-5, help="learning rate")
+parser.add_argument('--sample_every_iterations', default=5000, type=int, help="sample SR images for every number of iterations")
+parser.add_argument('--save_folder', default="DTGM_segnoise_4b_50k", type=str, help="Folder to save your train or evaluation result")
+parser.add_argument('--load_path', default="DTGM_segnoise_4b/DTGM_model_165000.pt", type=str, help="None or directory to pretrained model")
+parser.add_argument('--data_path', default='/hdda/Datasets/Face_super_resolution/images1024x1024/', type=str, help="directory to your training dataset")
+parser.add_argument('--batch_size', default=1, type=int)
+args = parser.parse_args()
+device = args.device if torch.cuda.is_available() else "cpu"
+size_list = [256, 64, 32, 16, 8, 4, 3, 2]
+timestep = len(size_list) - 1
+print(f"Total steps for {args.lr_size} to {args.hr_size}: {timestep}")
+model = UNet().to(device)
+discriminator = Discriminator().to(device)
+dtls = DTLS(
+    model,
+    image_size = args.hr_size,
+    stride = args.stride,
+    size_list=size_list,
+    timesteps = timestep,        # number of steps
+    device=device,
+).to(device)
+trainer = Trainer(
+    dtls,
+    discriminator,
+    args.data_path,
+    image_size = args.hr_size,
+    train_batch_size = args.batch_size,
+    train_num_steps = args.train_steps, # total training steps
+    ema_decay = 0.995,                  # exponential moving average decay
+    results_folder = args.save_folder,
+    load_path = args.load_path,
+    device = device,
+    eval_mode=True,
+    save_and_sample_every = args.sample_every_iterations
+)
+if __name__ == "__main__":
+    trainer.evaluation()
+    trainer.fid(created_dataset=args.save_folder)
+# import copy
+# from pathlib import Path
+# import torch
+# from util.models import *
+# import argparse
+# import random
+# import torch.nn.functional as F
+# from torchvision import utils
+# import os
+# import errno
+#
+# parser = argparse.ArgumentParser()
+# parser.add_argument('--device', default="cuda:1", type=str)
+# parser.add_argument('--hr_size', default=256, type=int, help="size of HR image")
+# parser.add_argument('--lr_size', default=2, type=int, help="size of LR image")
+# parser.add_argument('--num_sample', default=50000, type=str, help="Number of image to generate")
+# parser.add_argument('--save_folder', default="DTGM_Xeii_lpips_FM_ii_80kpt_50k", type=str, help="Folder to save your train or evaluation result")
+# parser.add_argument('--load_path', default="DTGM_Xeii_lpips_FM_ii/GDTLS_80000.pt", type=str, help="None or directory to pretrained model")
+#
+# def create_folder(path):
+#     try:
+#         os.mkdir(path)
+#     except OSError as exc:
+#         if exc.errno != errno.EEXIST:
+#             raise
+#         pass
+#
+# def transform_func_sample(img, target_size):
+#     n = target_size
+#     m = args.hr_size
+#
+#     if m / n > 16:
+#         img_1 = F.interpolate(img, size=m // 4, mode='bicubic', antialias=True)
+#         img_1 = F.interpolate(img_1, size=m // 8, mode='bicubic', antialias=True)
+#         img_1 = F.interpolate(img_1, size=n, mode='bicubic', antialias=True)
+#     else:
+#         img_1 = F.interpolate(img, size=n, mode='bicubic', antialias=True)
+#     img_1 = F.interpolate(img_1, size=m, mode='bicubic', antialias=True)
+#
+#     return img_1
+#
+# def transform_func_noise(img, device_, target_size, fixed_std=True):
+#     n = target_size
+#     m = args.hr_size
+#
+#     random_mean = torch.rand(1).add(-0.5).item()
+#     if fixed_std:
+#         random_std = 0.5
+#     else:
+#         random_std = torch.rand(1).mul(0.5).item()
+#     decreasing_scale = 0.9 ** (n - 2)
+#
+#     if m / n > 16:
+#         img_1 = F.interpolate(img, size=m // 4, mode='bicubic', antialias=True)
+#         img_1 = F.interpolate(img_1, size=m // 8, mode='bicubic', antialias=True)
+#         img_1 = F.interpolate(img_1, size=n, mode='bicubic', antialias=True)
+#     else:
+#         img_1 = F.interpolate(img, size=n, mode='bicubic', antialias=True)
+#
+#     # noise = torch.normal(mean=random_mean, std=random_std, size=(img_1.shape[0], 3, 2, 2)).to(self.device)
+#     # noise = F.interpolate(noise, size=n, mode='bicubic', antialias=True)
+#     noise = torch.normal(mean=random_mean, std=random_std, size=img_1.shape).to(device_)
+#
+#     img_1 += noise * decreasing_scale
+#     img_1 = F.interpolate(img_1, size=m, mode='bicubic', antialias=True)
+#
+#     noise_refinement = torch.normal(mean=0, std=0.05, size=img_1.shape).to(device_)
+#     return img_1 + noise_refinement
+#
+# def random_vector(batch_size):
+#     mean = random.uniform(-0.5, 0.5)
+#     std = random.uniform(0.1, 0.3)
+#     vector = torch.normal(mean=mean, std=std, size=(batch_size, 1, 2, 2))
+#     for colors in range(2):
+#         mean = random.uniform(-0.5, 0.5)
+#         std = random.uniform(0.1, 0.3)
+#         rgb = torch.normal(mean=mean, std=std, size=(batch_size, 1, 2, 2))
+#         vector = torch.cat((vector, rgb), dim=1)
+#     return vector
+#
+# def sample(size_list_, device_, model_, batch_size=1, img=None, t=None):
+#     blur_img = transform_func_sample(img.clone(), size_list_[t])
+#     img_t = blur_img.clone()
+#
+#     ####### Domain Transfer
+#     while t:
+#         next_step = size_list_[t - 1]
+#         step = torch.full((batch_size,), t, dtype=torch.long).to(device_)
+#         R_x = model_(img_t, step)
+#         if t == 1:
+#             return R_x
+#         else:
+#             img_t = transform_func_noise(R_x, device_, next_step, fixed_std=True)
+#         t -= 1
+#     return img_t
+#
+# if __name__ == "__main__":
+#     args = parser.parse_args()
+#     results_folder = Path(args.save_folder)
+#     results_folder.mkdir(exist_ok=True)
+#     device = args.device if torch.cuda.is_available() else "cpu"
+#     size_list = [256, 64, 32, 16, 8, 6, 4, 3, 2]
+#     timestep = len(size_list) - 1
+#     print(f"Total steps for {args.lr_size} to {args.hr_size}: {timestep}")
+#     model = UNet().to(device)
+#
+#     if args.load_path is not None:
+#         data = torch.load(args.load_path, map_location=device)
+#         model.load_state_dict(data['ema'], strict=False)
+#
+#     for i in range(args.num_sample):
+#         input_vector = random_vector(1).to(device)
+#         sample_hr = sample(size_list, device, model, batch_size=1, img=input_vector, t=timestep)
+#         utils.save_image(sample_hr.add(1).mul(0.5), f"{results_folder}/result_{i}.png", nrow=1)
+#         print("saving ", i)

gdtls.py ADDED Viewed

	@@ -0,0 +1,505 @@

+import copy
+import torch.nn.functional as F
+import numpy as np
+import os
+import errno
+import torch
+import shutil
+import wandb
+import random
+import time
+import lpips
+from torch import nn
+from torch.utils import data
+from pathlib import Path
+from torch.optim import Adam, AdamW
+from torchvision import transforms, utils
+from torchvision.transforms import InterpolationMode
+from torchvision.transforms.v2 import RandomResize
+from PIL import Image
+from util.fid_score import calculate_fid_given_paths
+try:
+    from apex import amp
+    APEX_AVAILABLE = True
+except:
+    APEX_AVAILABLE = False
+####### helpers functions
+def create_folder(path):
+    try:
+        os.mkdir(path)
+    except OSError as exc:
+        if exc.errno != errno.EEXIST:
+            raise
+        pass
+def del_folder(path):
+    try:
+        shutil.rmtree(path)
+    except OSError as exc:
+        pass
+def cycle(dl):
+    while True:
+        for data in dl:
+            yield data
+def num_to_groups(num, divisor):
+    groups = num // divisor
+    remainder = num % divisor
+    arr = [divisor] * groups
+    if remainder > 0:
+        arr.append(remainder)
+    return arr
+def loss_backwards(fp16, loss, optimizer, **kwargs):
+    if fp16:
+        with amp.scale_loss(loss, optimizer) as scaled_loss:
+            scaled_loss.backward(**kwargs)
+    else:
+        loss.backward(**kwargs)
+# small helper modules
+def rand_bbox(size, lam):
+    W = size[2]
+    H = size[3]
+    cut_rat = np.sqrt(1. - lam)
+    cut_w = np.int(W * cut_rat)
+    cut_h = np.int(H * cut_rat)
+    # uniform
+    cx = np.random.randint(W)
+    cy = np.random.randint(H)
+    bbx1 = np.clip(cx - cut_w // 2, 0, W)
+    bby1 = np.clip(cy - cut_h // 2, 0, H)
+    bbx2 = np.clip(cx + cut_w // 2, 0, W)
+    bby2 = np.clip(cy + cut_h // 2, 0, H)
+    return bbx1, bby1, bbx2, bby2
+def Huber(input, target, delta=0.1, reduce=True):
+    abs_error = torch.abs(input - target)
+    quadratic = torch.clamp(abs_error, max=delta)
+    # The following expression is the same in value as
+    # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
+    # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
+    # This is necessary to avoid doubling the gradient, since there is already a
+    # nonzero contribution to the gradient from the quadratic term.
+    linear = (abs_error - quadratic)
+    losses = 0.5 * torch.pow(quadratic, 2) + delta * linear
+    if reduce:
+        return torch.mean(losses)
+    else:
+        return losses
+class EMA():
+    def __init__(self, beta):
+        super().__init__()
+        self.beta = beta
+    def update_model_average(self, ma_model, current_model):
+        for current_params, ma_params in zip(current_model.parameters(), ma_model.parameters()):
+            old_weight, up_weight = ma_params.data, current_params.data
+            ma_params.data = self.update_average(old_weight, up_weight)
+    def update_average(self, old, new):
+        if old is None:
+            return new
+        return old * self.beta + (1 - self.beta) * new
+class DTLS(nn.Module):
+    def __init__(
+        self,
+        model,
+        *,
+        image_size,
+        size_list,
+        stride,
+        timesteps,
+        device,
+        stochastic=False,
+    ):
+        super().__init__()
+        self.image_size = image_size
+        self.UNet = model
+        self.num_timesteps = int(timesteps)
+        self.size_list = size_list
+        self.stride = stride
+        self.device = device
+        self.MSE_loss = nn.MSELoss()
+        # self.vgg_loss = Vgg19()
+        self.lpips_loss = lpips.LPIPS(net='alex')
+    def transform_func_loss(self, img, target_size):
+        n = target_size
+        m = self.image_size
+        if m/n > 16:
+            img_1 = F.interpolate(img, size=m//4, mode='bicubic', antialias=True)
+            img_1 = F.interpolate(img_1, size=m//8, mode='bicubic', antialias=True)
+            img_1 = F.interpolate(img_1, size=n, mode='bicubic', antialias=True)
+        else:
+            img_1 = F.interpolate(img, size=n, mode='bicubic', antialias=True)
+        return  img_1
+    def transform_func_sample(self, img, target_size):
+        n = target_size
+        m = self.image_size
+        if m/n > 16:
+            img_1 = F.interpolate(img, size=m//4, mode='bicubic', antialias=True)
+            img_1 = F.interpolate(img_1, size=m//8, mode='bicubic', antialias=True)
+            img_1 = F.interpolate(img_1, size=n, mode='bicubic', antialias=True)
+        else:
+            img_1 = F.interpolate(img, size=n, mode='bicubic', antialias=True)
+        img_1 = F.interpolate(img_1, size=m, mode='bicubic', antialias=True)
+        return  img_1
+    def transform_func_noise(self, img, target_size, std_eval = False):
+        n = target_size
+        m = self.image_size
+        random_mean = torch.rand(1).add(-.5).item()
+        # random_std = torch.rand(1).mul(0.5).item()
+        decreasing_scale = 0.9 ** (n - 2)
+        if m / n > 16:
+            img_1 = F.interpolate(img, size=m // 4, mode='bicubic', antialias=True)
+            img_1 = F.interpolate(img_1, size=m // 8, mode='bicubic', antialias=True)
+            img_1 = F.interpolate(img_1, size=n, mode='bicubic', antialias=True)
+        else:
+            img_1 = F.interpolate(img, size=n, mode='bicubic', antialias=True)
+        noise = torch.normal(mean=random_mean, std=0.5, size=(img_1.shape[0], 3, 2, 2)).to(self.device)
+        noise = F.interpolate(noise, size=n, mode='bicubic', antialias=True)
+        img_1 += noise * decreasing_scale
+        img_1 = F.interpolate(img_1, size=m, mode='bicubic', antialias=True)
+        if n >= 16:
+            noise_refinement = torch.normal(mean=0, std=1, size=img_1.shape).to(self.device)
+            img_1 = img_1 + noise_refinement * decreasing_scale
+        return img_1
+    @torch.no_grad()
+    def sample(self, batch_size=16, img=None, t=None, save_folder=None):
+        if t == None:
+            t = self.num_timesteps
+        blur_img = self.transform_func_sample(img.clone(), self.size_list[t])
+        img_t = blur_img.clone()
+        ####### Domain Transfer
+        while (t):
+            next_step = self.size_list[t-1]
+            step = torch.full((batch_size,), t, dtype=torch.long).to(self.device)
+            R_x = self.UNet(img_t, step)
+            if t == 1:
+                return blur_img, R_x
+            else:
+                img_t = self.transform_func_noise(R_x, next_step)
+            t -= 1
+        return blur_img, img_t
+    def p_losses(self, x_start, t):
+        x_blur = x_start.clone()
+        for i in range(t.shape[0]):
+            current_step = self.size_list[t[i]]
+            x_blur[i] = self.transform_func_noise(x_blur[i].unsqueeze(0), current_step)
+        x_recon = self.UNet(x_blur, t)
+        ### Pattern Domain Similarity Loss
+        x_clone = x_recon.clone()
+        for i in range(t.shape[0]):
+            current_step = self.size_list[t[i]]
+            x_clone[i] = self.transform_func_sample(x_recon[i].unsqueeze(0), current_step)
+        ### Lowest Pattern Domain Similarity Loss
+        # x_clone = x_recon.clone()
+        # x_clone = self.transform_func_loss(x_clone, self.size_list[-1])
+        # x_blur = self.transform_func_loss(x_blur, self.size_list[-1])
+        loss = self.MSE_loss(x_clone, x_blur)
+        # lpips_loss = self.lpips_loss(x_recon, x_start).mean()
+        return loss, x_recon
+    def forward(self, x, *args, **kwargs):
+        b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size
+        assert h == img_size and w == img_size, f'height and width of image must be {img_size}'
+        t = torch.randint(1, self.num_timesteps + 1, (b,), device=device).long()
+        return self.p_losses(x, t, *args, **kwargs)
+# dataset classes
+class Dataset(data.Dataset):
+    def __init__(self, folder, image_size, exts = ['jpg', 'jpeg', 'png']):
+        super().__init__()
+        self.folder = folder
+        self.image_size = image_size
+        self.paths = [p for ext in exts for p in Path(f'{folder}').glob(f'**/*.{ext}')]
+        self.transform = transforms.Compose([
+            RandomResize(int(image_size), int(image_size*1.2), interpolation=InterpolationMode.BICUBIC, antialias=True),
+            transforms.RandomCrop(image_size),
+            transforms.RandomHorizontalFlip(),
+            transforms.ToTensor(),
+            transforms.Lambda(lambda t: (t * 2) - 1)
+        ])
+    def __len__(self):
+        return len(self.paths)
+    def __getitem__(self, index):
+        path = self.paths[index]
+        img = Image.open(path)
+        return self.transform(img)
+# trainer class
+class Trainer(object):
+    def __init__(
+        self,
+        diffusion_model,
+        discriminator,
+        folder,
+        *,
+        ema_decay = 0.9925,
+        image_size = 128,
+        train_batch_size = 32,
+        train_num_steps = 200000,
+        step_start_ema = 500,
+        update_ema_every = 10,
+        save_and_sample_every = 1000,
+        results_folder,
+        load_path = None,
+        shuffle=True,
+        eval_mode=False,
+        device,
+    ):
+        super().__init__()
+        ########## Wandb ##########
+        if not eval_mode:
+            wandb.init(project="DTGM", notes=str(results_folder), name=results_folder)
+        self.results_folder = Path(results_folder)
+        self.results_folder.mkdir(exist_ok = True)
+        self.device = device
+        self.model = diffusion_model
+        self.discriminator = discriminator
+        self.model_size()
+        self.ema = EMA(ema_decay)
+        self.ema_model = copy.deepcopy(self.model)
+        self.update_ema_every = update_ema_every
+        self.step_start_ema = step_start_ema
+        self.save_and_sample_every = save_and_sample_every
+        self.image_size = diffusion_model.image_size
+        self.batch_size = train_batch_size
+        self.train_num_steps = train_num_steps
+        self.nrow = train_batch_size // 2
+        self.folder_path = folder
+        self.ds = Dataset(folder, image_size)
+        self.dl = cycle(data.DataLoader(self.ds, batch_size = train_batch_size, shuffle=shuffle, pin_memory=True, num_workers=2))
+        self.opt = AdamW(diffusion_model.parameters(), lr=2e-5, betas=(0.0, 0.9), eps=1e-8)
+        self.opt_d = AdamW(self.discriminator.parameters(), lr=5e-5, betas=(0.0, 0.9), eps=1e-8)
+        self.BCE_loss = torch.nn.BCEWithLogitsLoss()
+        self.step = 0
+        self.reset_parameters()
+        self.best_quality = 0
+        self.loss_dis_false_temp = 0
+        self.loss_dis_true_temp = 0
+        self.load_path = load_path
+        self.n_mix = 0
+        self.fid_list = []
+        with open(f'{self.results_folder}/fid.txt', 'w') as f:
+            for a in self.fid_list:
+                f.write(f'{self.step} {a}\n')
+    def reset_parameters(self):
+        self.ema_model.load_state_dict(self.model.state_dict())
+    def step_ema(self):
+        if self.step < self.step_start_ema:
+            self.reset_parameters()
+            return
+        self.ema.update_model_average(self.ema_model, self.model)
+    def model_size(self):
+        param_size = 0
+        for param in self.model.parameters():
+            param_size += param.nelement() * param.element_size()
+        buffer_size = 0
+        for buffer in self.model.buffers():
+            buffer_size += buffer.nelement() * buffer.element_size()
+        size_all_mb = (param_size + buffer_size) / 1024**2
+        line = ('model size: {:.3f}MB'.format(size_all_mb))
+        print(line)
+    def save_ckpt(self):
+        data = {
+            'step': self.step,
+            'model': self.model.state_dict(),
+            'ema': self.ema_model.state_dict(),
+            'dis': self.discriminator.state_dict(),
+        }
+        torch.save(data, str(self.results_folder / f'DTGM_ckpt_{self.step}.pt'))
+    def save_model(self):
+        data = {
+            'ema': self.ema_model.state_dict(),
+        }
+        torch.save(data, str(self.results_folder / f'DTGM_model_{self.step}.pt'))
+    def load_all(self, load_path):
+        print("Loading : ", load_path)
+        data = torch.load(load_path, map_location=self.device)
+        self.step = data['step']
+        self.model.load_state_dict(data['model'], strict=False)
+        self.ema_model.load_state_dict(data['ema'], strict=False)
+        self.discriminator.load_state_dict(data['dis'], strict=False)
+    def load_for_eval(self, load_path):
+        data = torch.load(load_path, map_location=self.device)
+        self.ema_model.load_state_dict(data['ema'], strict=False)
+    def train(self):
+        if self.load_path is not None:
+            self.load_all(self.load_path)
+        while self.step < self.train_num_steps:
+            start_time = time.time()
+            data = next(self.dl)
+            data = data.to(self.device)
+            loss_domain_sim, x_recon = self.model(data)
+            self.opt_d.zero_grad()
+            score_true = self.discriminator(data)
+            GAN_true = torch.ones_like(score_true)
+            loss_dis_true = self.BCE_loss(score_true, GAN_true)
+            loss_dis_true.backward()
+            score_false = self.discriminator(x_recon.detach())
+            GAN_false = torch.zeros_like(score_false)
+            loss_dis_false = self.BCE_loss(score_false, GAN_false)
+            loss_dis_false.backward()
+            self.opt_d.step()
+            self.loss_dis_false_temp = loss_dis_false.item()
+            self.loss_dis_true_temp = loss_dis_true.item()
+            self.opt.zero_grad()
+            score_fake = self.discriminator(x_recon)
+            GAN_fake = torch.ones_like(score_fake)
+            loss_gen = self.BCE_loss(score_fake, GAN_fake) * 1e-2
+            (loss_gen + loss_domain_sim).backward()
+            self.opt.step()
+            if self.step % 10 == 0:
+                print(f'{self.step} DTLS: Total loss: {loss_domain_sim.item() + loss_gen.item()} | Domain sim: {loss_domain_sim.item()} '
+                      f'| Generate: {loss_gen.item()} '
+                      f'| Dis real: {self.loss_dis_true_temp} | Dis false: {self.loss_dis_false_temp}')
+                      # f'| Features Matching loss: {loss_FM.item()}')
+            wandb.log({"Total loss": loss_domain_sim.item() + loss_gen.item(), "Domain Similarity Loss": loss_domain_sim.item(),
+                       "Generation loss": loss_gen.item(),
+                       "Discriminator loss (real)": self.loss_dis_true_temp,
+                       "Discriminator loss (fake)": self.loss_dis_false_temp,}, step=self.step)
+            # "Feature Matching loss": loss_FM.item()  "LPIPS loss": lpips_loss.item()
+            if self.step % self.update_ema_every == 0:
+                self.step_ema()
+            if self.step == 0 or self.step % self.save_and_sample_every == 0:
+                lr_real, sr_real = self.ema_model.sample(batch_size=self.batch_size, img=data)
+                _, sr_real_ii = self.model.sample(batch_size=self.batch_size, img=data)
+                save_img = torch.cat((sr_real_ii, lr_real, data, sr_real),dim=0)
+                utils.save_image((save_img+1)/2, str(self.results_folder / f'{self.step}_GDTLS.png'), nrow=self.nrow)
+                wandb.log({"Checkpoint result": wandb.Image(str(self.results_folder / f'{self.step}_GDTLS.png'))})
+                if self.step >= 100000 and self.step % 2500 == 0:
+                        self.save_model()
+                        self.validate()
+                if self.step != 0 and self.step % 10000 == 0:
+                    self.save_ckpt()
+            self.step += 1
+        print('training completed')
+        wandb.finish()
+    def validate(self):
+        folder_name = f"temp_samples_{self.device.split(':')[-1]}"
+        create_folder(folder_name)
+        for i in range(2000):
+            random_vector = self.random_vector(1)
+            _, sample_hr = self.ema_model.sample(batch_size=1, img=random_vector, save_folder=self.results_folder)
+            utils.save_image((sample_hr + 1) /2, f"{folder_name}/result_{i}.png", nrow=1)
+        fid = calculate_fid_given_paths([folder_name, "/hdda/Datasets/ffhq256_mini"], 200, self.device, dims=2048, num_workers=4)
+        self.fid_list.append(f"{self.step} {fid.item()}")
+        with open(f'{self.results_folder}/fid.txt', 'w') as f:
+            for a in self.fid_list:
+                f.write(f'{a}\n')
+        wandb.log({"FID score": fid.item()})
+        del_folder(folder_name)
+    def random_vector(self, batch_size):
+        mean = random.uniform(-0.75, 0.75)
+        std = random.uniform(0.01, 0.5)
+        vector = torch.normal(mean=mean, std=std, size=(batch_size, 1, 2, 2))
+        for i in range(2):
+            mean = random.uniform(-0.75, 0.75)
+            std = random.uniform(0.01, 0.5)
+            rgb = torch.normal(mean=mean, std=std, size=(batch_size, 1, 2, 2))
+            vector = torch.cat((vector, rgb), dim=1)
+        return vector.to(self.device)
+    def evaluation(self, num_sample=50000, batch_size=16):
+        if self.load_path != None:
+            self.load_for_eval(self.load_path)
+        img_count = 1
+        while img_count <= num_sample:
+        # for i in range(num_sample):
+            random_vector = self.random_vector(batch_size)
+            _, sample_hr = self.ema_model.sample(batch_size=batch_size, img=random_vector, save_folder=self.results_folder)
+            for img in sample_hr:
+                if img_count <= num_sample:
+                    utils.save_image((img + 1) /2, str(self.results_folder /  f'result_{img_count}.png'), nrow=1)
+                    print("saving ", img_count)
+                    img_count += 1
+            # utils.save_image((blur_img_set + 1) /2, str(self.results_folder /  f'random_vector_{i}.png'), nrow=4)
+            # utils.save_image((sample_hr + 1) /2, str(self.results_folder /  f'result_{i}.png'), nrow=1)
+            # print("saving ", i)
+    def fid(self, created_dataset, realistic_dataset="/hdda/Datasets/ffhq256"):
+        fid = calculate_fid_given_paths([created_dataset, realistic_dataset], 500, self.device, dims=2048, num_workers=4)
+        print("FID Score 50k: ", fid)

models.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import math
+import torch
+from torch import nn
+from einops import rearrange
+from inspect import isfunction
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv') != -1:
+        try:
+            m.weight.data.normal_(0.0, 0.02)
+        except:
+            pass
+    elif classname.find('BatchNorm') != -1:
+        m.weight.data.normal_(1.0, 0.02)
+        m.bias.data.fill_(0)
+def exists(x):
+    return x is not None
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+class Residual(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+    def forward(self, x, *args, **kwargs):
+        return self.fn(x, *args, **kwargs) + x
+class SinusoidalPosEmb(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+    def forward(self, x):
+        device = x.device
+        half_dim = self.dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
+        emb = x[:, None] * emb[None, :]
+        emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
+        return emb
+class LayerNorm(nn.Module):
+    def __init__(self, dim, eps = 1e-5):
+        super().__init__()
+        self.eps = eps
+        self.g = nn.Parameter(torch.ones(1, dim, 1, 1))
+        self.b = nn.Parameter(torch.zeros(1, dim, 1, 1))
+    def forward(self, x):
+        var = torch.var(x, dim = 1, unbiased = False, keepdim = True)
+        mean = torch.mean(x, dim = 1, keepdim = True)
+        return (x - mean) / (var + self.eps).sqrt() * self.g + self.b
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.fn = fn
+        self.norm = LayerNorm(dim)
+        # self.norm = nn.BatchNorm2d(dim)
+        # self.norm = nn.GroupNorm(dim // 32, dim)
+    def forward(self, x):
+        x = self.norm(x)
+        return self.fn(x)
+# building block modules
+class ConvNextBlock(nn.Module):
+    """ https://arxiv.org/abs/2201.03545 """
+    def __init__(self, dim, dim_out, *, time_emb_dim = None, mult = 2, norm = True):
+        super().__init__()
+        self.mlp = nn.Sequential(
+            nn.GELU(),
+            nn.Linear(time_emb_dim, dim*2)
+        ) if exists(time_emb_dim) else None
+        self.ds_conv = nn.Conv2d(dim, dim, 7, padding = 3, groups = dim)
+        self.net = nn.Sequential(
+            LayerNorm(dim) if norm else nn.Identity(),
+            nn.Conv2d(dim, dim_out * mult, 3, 1, 1),
+            nn.GELU(),
+            nn.Conv2d(dim_out * mult, dim_out, 3, 1, 1),
+        )
+        # self.noise_adding = NoiseInjection(dim_out)
+        self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
+    def forward(self, x, time_emb = None):
+        h = self.ds_conv(x)
+        if exists(self.mlp):
+            assert exists(time_emb), 'time emb must be passed in'
+            condition = self.mlp(time_emb)
+            condition = rearrange(condition, 'b c -> b c 1 1')
+            weight, bias = torch.split(condition, x.shape[1],dim=1)
+            h = h * (1 + weight) + bias
+        h = self.net(h)
+        # h = self.noise_adding(h)
+        return h + self.res_conv(x)
+class ConvNextBlock_dis(nn.Module):
+    """ https://arxiv.org/abs/2201.03545 """
+    def __init__(self, dim, dim_out, *, time_emb_dim = None, mult = 2, norm = True):
+        super().__init__()
+        self.mlp = nn.Sequential(
+            nn.GELU(),
+            nn.Linear(time_emb_dim, dim*2)
+        ) if exists(time_emb_dim) else None
+        self.ds_conv = nn.Conv2d(dim, dim, 7, padding = 3, groups = dim)
+        self.net = nn.Sequential(
+            nn.BatchNorm2d(dim) if norm else nn.Identity(),
+            # LayerNorm(dim) if norm else nn.Identity(),
+            nn.Conv2d(dim, dim_out * mult, 3, 1, 1),
+            nn.GELU(),
+            nn.Conv2d(dim_out * mult, dim_out, 3, 1, 1),
+        )
+        self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
+    def forward(self, x):
+        h = self.ds_conv(x)
+        h = self.net(h)
+        return h + self.res_conv(x)
+class LinearAttention(nn.Module):
+    def __init__(self, dim, heads = 4, dim_head = 32):
+        super().__init__()
+        self.scale = dim_head ** -0.5
+        self.heads = heads
+        hidden_dim = dim_head * heads
+        self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias = False)
+        self.to_out = nn.Conv2d(hidden_dim, dim, 1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        qkv = self.to_qkv(x).chunk(3, dim = 1)
+        q, k, v = map(lambda t: rearrange(t, 'b (h c) x y -> b h c (x y)', h = self.heads), qkv)
+        q = q * self.scale
+        k = k.softmax(dim = -1)
+        context = torch.einsum('b h d n, b h e n -> b h d e', k, v)
+        out = torch.einsum('b h d e, b h d n -> b h e n', context, q)
+        out = rearrange(out, 'b h c (x y) -> b (h c) x y', h = self.heads, x = h, y = w)
+        return self.to_out(out)
+# model
+class UNet(nn.Module):
+    def __init__(
+        self,
+        dim = 32,
+        dim_mults=(1, 2, 4, 8, 16, 32, 32),
+        channels = 3,
+    ):
+        super().__init__()
+        self.channels = dim
+        dims = [dim, *map(lambda m: dim * m, dim_mults)]
+        in_out = list(zip(dims[:-1], dims[1:]))
+        self.model_depth = len(dim_mults)
+        time_dim = dim
+        self.time_mlp = nn.Sequential(
+            SinusoidalPosEmb(dim),
+            nn.Linear(dim, dim * 2),
+            nn.GELU(),
+            nn.Linear(dim * 2, dim)
+        )
+        self.downs = nn.ModuleList([])
+        self.ups = nn.ModuleList([])
+        num_resolutions = len(in_out)
+        self.initial = nn.Conv2d(channels, dim, 7,1,3, bias=False)
+        for ind, (dim_in, dim_out) in enumerate(in_out):
+            self.downs.append(nn.ModuleList([
+                ConvNextBlock(dim_in, dim_out, time_emb_dim = time_dim, norm = ind != 0),
+                nn.AvgPool2d(2),
+                Residual(PreNorm(dim_out, LinearAttention(dim_out))) if ind >= (num_resolutions - 3) else nn.Identity(),
+                ConvNextBlock(dim_out, dim_out, time_emb_dim=time_dim),
+            ]))
+        for ind, (dim_in, dim_out) in enumerate(reversed(in_out)):
+            self.ups.append(nn.ModuleList([
+                ConvNextBlock(dim_out * 2, dim_in, time_emb_dim = time_dim),
+                nn.Upsample(scale_factor=2, mode='nearest'),
+                Residual(PreNorm(dim_in, LinearAttention(dim_in))) if ind < 3 else nn.Identity(),
+                ConvNextBlock(dim_in, dim_in, time_emb_dim=time_dim),
+            ]))
+        self.final_conv = nn.Conv2d(dim, 3, 1, bias=False)
+    def forward(self, x, time):
+        x = self.initial(x)
+        t = self.time_mlp(time) if exists(self.time_mlp) else None
+        h = []
+        for convnext, downsample, attn, convnext2 in self.downs:
+            x = convnext(x, t)
+            x = downsample(x)
+            h.append(x)
+            x = attn(x)
+            x = convnext2(x, t)
+        for convnext, upsample,  attn, convnext2 in self.ups:
+            x = torch.cat((x, h.pop()), dim=1)
+            x = convnext(x, t)
+            x = upsample(x)
+            x = attn(x)
+            x = convnext2(x, t)
+        return self.final_conv(x)
+class Discriminator(nn.Module):
+    def __init__(
+            self,
+            dim=32,
+            dim_mults=(1, 2, 4, 8, 16, 32, 32),
+            channels=3,
+            with_time_emb=True,
+    ):
+        super().__init__()
+        self.channels = dim
+        dims = [dim, *map(lambda m: dim * m, dim_mults)]
+        in_out = list(zip(dims[:-1], dims[1:]))
+        self.model_depth = len(dim_mults)
+        self.downs = nn.ModuleList([])
+        num_resolutions = len(in_out)
+        self.initial = nn.Conv2d(channels, dim, 7,1,3, bias=False)
+        for ind, (dim_in, dim_out) in enumerate(in_out):
+            is_last = ind >= (num_resolutions - 1)
+            self.downs.append(nn.ModuleList([
+                ConvNextBlock_dis(dim_in, dim_out, norm=ind != 0),
+                nn.AvgPool2d(2),
+                ConvNextBlock_dis(dim_out, dim_out),
+            ]))
+        dim_out = dim_mults[-1] * dim
+        self.out = nn.Conv2d(dim_out, 1, 1, bias=False)
+    def forward(self, x):
+        x = self.initial(x)
+        for convnext, downsample, convnext2 in self.downs:
+            x = convnext(x)
+            x = downsample(x)
+            x = convnext2(x)
+        return self.out(x).view(x.shape[0], -1)