Spaces:

Arnaudding001
/

MakeCartoonVideo

Runtime error

App Files Files Community

Arnaudding001 commited on Nov 7, 2022

Commit

48d26bf

1 Parent(s): 4151959

Create util.py

Browse files

Files changed (1) hide show

util.py +229 -0

util.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import numpy as np
+import matplotlib.pyplot as plt
+from PIL import Image
+import cv2
+import random
+import math
+import argparse
+import torch
+from torch.utils import data
+from torch.nn import functional as F
+from torch import autograd
+from torch.nn import init
+import torchvision.transforms as transforms
+from model.stylegan.op import conv2d_gradfix
+from model.encoder.encoders.psp_encoders import GradualStyleEncoder
+from model.encoder.align_all_parallel import get_landmark
+def visualize(img_arr, dpi):
+    plt.figure(figsize=(10,10),dpi=dpi)
+    plt.imshow(((img_arr.detach().cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8))
+    plt.axis('off')
+    plt.show()
+def save_image(img, filename):
+    tmp = ((img.detach().cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8)
+    cv2.imwrite(filename, cv2.cvtColor(tmp, cv2.COLOR_RGB2BGR))
+def load_image(filename):
+    transform = transforms.Compose([
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5,0.5,0.5]),
+    ])
+    img = Image.open(filename)
+    img = transform(img)
+    return img.unsqueeze(dim=0)
+def data_sampler(dataset, shuffle, distributed):
+    if distributed:
+        return data.distributed.DistributedSampler(dataset, shuffle=shuffle)
+    if shuffle:
+        return data.RandomSampler(dataset)
+    else:
+        return data.SequentialSampler(dataset)
+def requires_grad(model, flag=True):
+    for p in model.parameters():
+        p.requires_grad = flag
+def accumulate(model1, model2, decay=0.999):
+    par1 = dict(model1.named_parameters())
+    par2 = dict(model2.named_parameters())
+    for k in par1.keys():
+        par1[k].data.mul_(decay).add_(par2[k].data, alpha=1 - decay)
+def sample_data(loader):
+    while True:
+        for batch in loader:
+            yield batch
+def d_logistic_loss(real_pred, fake_pred):
+    real_loss = F.softplus(-real_pred)
+    fake_loss = F.softplus(fake_pred)
+    return real_loss.mean() + fake_loss.mean()
+def d_r1_loss(real_pred, real_img):
+    with conv2d_gradfix.no_weight_gradients():
+        grad_real, = autograd.grad(
+            outputs=real_pred.sum(), inputs=real_img, create_graph=True
+        )
+    grad_penalty = grad_real.pow(2).reshape(grad_real.shape[0], -1).sum(1).mean()
+    return grad_penalty
+def g_nonsaturating_loss(fake_pred):
+    loss = F.softplus(-fake_pred).mean()
+    return loss
+def g_path_regularize(fake_img, latents, mean_path_length, decay=0.01):
+    noise = torch.randn_like(fake_img) / math.sqrt(
+        fake_img.shape[2] * fake_img.shape[3]
+    )
+    grad, = autograd.grad(
+        outputs=(fake_img * noise).sum(), inputs=latents, create_graph=True
+    )
+    path_lengths = torch.sqrt(grad.pow(2).sum(2).mean(1))
+    path_mean = mean_path_length + decay * (path_lengths.mean() - mean_path_length)
+    path_penalty = (path_lengths - path_mean).pow(2).mean()
+    return path_penalty, path_mean.detach(), path_lengths
+def make_noise(batch, latent_dim, n_noise, device):
+    if n_noise == 1:
+        return torch.randn(batch, latent_dim, device=device)
+    noises = torch.randn(n_noise, batch, latent_dim, device=device).unbind(0)
+    return noises
+def mixing_noise(batch, latent_dim, prob, device):
+    if prob > 0 and random.random() < prob:
+        return make_noise(batch, latent_dim, 2, device)
+    else:
+        return [make_noise(batch, latent_dim, 1, device)]
+def set_grad_none(model, targets):
+    for n, p in model.named_parameters():
+        if n in targets:
+            p.grad = None
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find('BatchNorm2d') != -1:
+        if hasattr(m, 'weight') and m.weight is not None:
+            init.normal_(m.weight.data, 1.0, 0.02)
+        if hasattr(m, 'bias') and m.bias is not None:
+            init.constant_(m.bias.data, 0.0)
+    elif hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
+        init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
+        if hasattr(m, 'bias') and m.bias is not None:
+            init.constant_(m.bias.data, 0.0)
+def load_psp_standalone(checkpoint_path, device='cuda'):
+    ckpt = torch.load(checkpoint_path, map_location='cpu')
+    opts = ckpt['opts']
+    if 'output_size' not in opts:
+        opts['output_size'] = 1024
+    opts['n_styles'] = int(math.log(opts['output_size'], 2)) * 2 - 2
+    opts = argparse.Namespace(**opts)
+    psp = GradualStyleEncoder(50, 'ir_se', opts)
+    psp_dict = {k.replace('encoder.', ''): v for k, v in ckpt['state_dict'].items() if k.startswith('encoder.')}
+    psp.load_state_dict(psp_dict)
+    psp.eval()
+    psp = psp.to(device)
+    latent_avg = ckpt['latent_avg'].to(device)
+    def add_latent_avg(model, inputs, outputs):
+        return outputs + latent_avg.repeat(outputs.shape[0], 1, 1)
+    psp.register_forward_hook(add_latent_avg)
+    return psp
+def get_video_crop_parameter(filepath, predictor, padding=[200,200,200,200]):
+    if type(filepath) == str:
+        img = dlib.load_rgb_image(filepath)
+    else:
+        img = filepath
+    lm = get_landmark(img, predictor)
+    if lm is None:
+        return None
+    lm_chin          = lm[0  : 17]  # left-right
+    lm_eyebrow_left  = lm[17 : 22]  # left-right
+    lm_eyebrow_right = lm[22 : 27]  # left-right
+    lm_nose          = lm[27 : 31]  # top-down
+    lm_nostrils      = lm[31 : 36]  # top-down
+    lm_eye_left      = lm[36 : 42]  # left-clockwise
+    lm_eye_right     = lm[42 : 48]  # left-clockwise
+    lm_mouth_outer   = lm[48 : 60]  # left-clockwise
+    lm_mouth_inner   = lm[60 : 68]  # left-clockwise
+    scale = 64. / (np.mean(lm_eye_right[:,0])-np.mean(lm_eye_left[:,0]))
+    center = ((np.mean(lm_eye_right, axis=0)+np.mean(lm_eye_left, axis=0)) / 2) * scale
+    h, w = round(img.shape[0] * scale), round(img.shape[1] * scale)
+    left = max(round(center[0] - padding[0]), 0) // 8 * 8
+    right = min(round(center[0] + padding[1]), w) // 8 * 8
+    top = max(round(center[1] - padding[2]), 0) // 8 * 8
+    bottom = min(round(center[1] + padding[3]), h) // 8 * 8
+    return h,w,top,bottom,left,right,scale
+def tensor2cv2(img):
+    tmp = ((img.cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8)
+    return cv2.cvtColor(tmp, cv2.COLOR_RGB2BGR)
+# get parameters from the stylegan and mark them with their layers
+def gather_params(G):
+    params = dict(
+        [(res, {}) for res in range(18)] + [("others", {})]
+    )
+    for n, p in sorted(list(G.named_buffers()) + list(G.named_parameters())):
+        if n.startswith("convs"):
+            layer = int(n.split(".")[1]) + 1
+            params[layer][n] = p
+        elif n.startswith("to_rgbs"):
+            layer = int(n.split(".")[1]) * 2 + 3
+            params[layer][n] = p
+        elif n.startswith("conv1"):
+            params[0][n] = p
+        elif n.startswith("to_rgb1"):
+            params[1][n] = p
+        else:
+            params["others"][n] = p
+    return params
+# blend the ffhq stylegan model and the finetuned model for toonify
+# see ``Resolution Dependent GAN Interpolation for Controllable Image Synthesis Between Domains''
+def blend_models(G_low, G_high, weight=[1]*7+[0]*11):
+    params_low = gather_params(G_low)
+    params_high = gather_params(G_high)
+    for res in range(18):
+        for n, p in params_high[res].items():
+            params_high[res][n] = params_high[res][n] * (1-weight[res]) + params_low[res][n] * weight[res]
+    state_dict = {}
+    for _, p in params_high.items():
+        state_dict.update(p)
+    return state_dict