| | import torch |
| | import torch.nn as nn |
| | import torch.nn.functional as F |
| | import torchvision.transforms as transforms |
| | import cv2 |
| | import numpy as np |
| |
|
| | from .model import BiSeNet |
| |
|
| | mask_regions = { |
| | "Background":0, |
| | "Skin":1, |
| | "L-Eyebrow":2, |
| | "R-Eyebrow":3, |
| | "L-Eye":4, |
| | "R-Eye":5, |
| | "Eye-G":6, |
| | "L-Ear":7, |
| | "R-Ear":8, |
| | "Ear-R":9, |
| | "Nose":10, |
| | "Mouth":11, |
| | "U-Lip":12, |
| | "L-Lip":13, |
| | "Neck":14, |
| | "Neck-L":15, |
| | "Cloth":16, |
| | "Hair":17, |
| | "Hat":18 |
| | } |
| |
|
| | |
| | |
| | class SoftErosion(nn.Module): |
| | def __init__(self, kernel_size=15, threshold=0.6, iterations=1): |
| | super(SoftErosion, self).__init__() |
| | r = kernel_size // 2 |
| | self.padding = r |
| | self.iterations = iterations |
| | self.threshold = threshold |
| |
|
| | |
| | y_indices, x_indices = torch.meshgrid(torch.arange(0., kernel_size), torch.arange(0., kernel_size)) |
| | dist = torch.sqrt((x_indices - r) ** 2 + (y_indices - r) ** 2) |
| | kernel = dist.max() - dist |
| | kernel /= kernel.sum() |
| | kernel = kernel.view(1, 1, *kernel.shape) |
| | self.register_buffer('weight', kernel) |
| |
|
| | def forward(self, x): |
| | x = x.float() |
| | for i in range(self.iterations - 1): |
| | x = torch.min(x, F.conv2d(x, weight=self.weight, groups=x.shape[1], padding=self.padding)) |
| | x = F.conv2d(x, weight=self.weight, groups=x.shape[1], padding=self.padding) |
| |
|
| | mask = x >= self.threshold |
| | x[mask] = 1.0 |
| | x[~mask] /= x[~mask].max() |
| |
|
| | return x, mask |
| |
|
| | device = "cpu" |
| |
|
| | def init_parser(pth_path, mode="cpu"): |
| | global device |
| | device = mode |
| | n_classes = 19 |
| | net = BiSeNet(n_classes=n_classes) |
| | if device == "cuda": |
| | net.cuda() |
| | net.load_state_dict(torch.load(pth_path)) |
| | else: |
| | net.load_state_dict(torch.load(pth_path, map_location=torch.device('cpu'))) |
| | net.eval() |
| | return net |
| |
|
| |
|
| | def image_to_parsing(img, net): |
| | img = cv2.resize(img, (512, 512)) |
| | img = img[:,:,::-1] |
| | transform = transforms.Compose([ |
| | transforms.ToTensor(), |
| | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) |
| | ]) |
| | img = transform(img.copy()) |
| | img = torch.unsqueeze(img, 0) |
| |
|
| | with torch.no_grad(): |
| | img = img.to(device) |
| | out = net(img)[0] |
| | parsing = out.squeeze(0).cpu().numpy().argmax(0) |
| | return parsing |
| |
|
| |
|
| | def get_mask(parsing, classes): |
| | res = parsing == classes[0] |
| | for val in classes[1:]: |
| | res += parsing == val |
| | return res |
| |
|
| |
|
| | def swap_regions(source, target, net, smooth_mask, includes=[1,2,3,4,5,10,11,12,13], blur=10): |
| | parsing = image_to_parsing(source, net) |
| |
|
| | if len(includes) == 0: |
| | return source, np.zeros_like(source) |
| |
|
| | include_mask = get_mask(parsing, includes) |
| | mask = np.repeat(include_mask[:, :, np.newaxis], 3, axis=2).astype("float32") |
| |
|
| | if smooth_mask is not None: |
| | mask_tensor = torch.from_numpy(mask.copy().transpose((2, 0, 1))).float().to(device) |
| | face_mask_tensor = mask_tensor[0] + mask_tensor[1] |
| | soft_face_mask_tensor, _ = smooth_mask(face_mask_tensor.unsqueeze_(0).unsqueeze_(0)) |
| | soft_face_mask_tensor.squeeze_() |
| | mask = np.repeat(soft_face_mask_tensor.cpu().numpy()[:, :, np.newaxis], 3, axis=2) |
| |
|
| | if blur > 0: |
| | mask = cv2.GaussianBlur(mask, (0, 0), blur) |
| |
|
| | resized_source = cv2.resize((source).astype("float32"), (512, 512)) |
| | resized_target = cv2.resize((target).astype("float32"), (512, 512)) |
| | result = mask * resized_source + (1 - mask) * resized_target |
| | result = cv2.resize(result.astype("uint8"), (source.shape[1], source.shape[0])) |
| |
|
| | return result |
| |
|
| | def mask_regions_to_list(values): |
| | out_ids = [] |
| | for value in values: |
| | if value in mask_regions.keys(): |
| | out_ids.append(mask_regions.get(value)) |
| | return out_ids |
| |
|