| | import cv2 |
| | import torch |
| | import torchvision |
| | import numpy as np |
| | import torch.nn as nn |
| | from PIL import Image |
| | from tqdm import tqdm |
| | import torch.nn.functional as F |
| | import torchvision.transforms as transforms |
| |
|
| | from . model import BiSeNet |
| |
|
| | class SoftErosion(nn.Module): |
| | def __init__(self, kernel_size=15, threshold=0.6, iterations=1): |
| | super(SoftErosion, self).__init__() |
| | r = kernel_size // 2 |
| | self.padding = r |
| | self.iterations = iterations |
| | self.threshold = threshold |
| |
|
| | |
| | y_indices, x_indices = torch.meshgrid(torch.arange(0., kernel_size), torch.arange(0., kernel_size)) |
| | dist = torch.sqrt((x_indices - r) ** 2 + (y_indices - r) ** 2) |
| | kernel = dist.max() - dist |
| | kernel /= kernel.sum() |
| | kernel = kernel.view(1, 1, *kernel.shape) |
| | self.register_buffer('weight', kernel) |
| |
|
| | def forward(self, x): |
| | batch_size = x.size(0) |
| | output = [] |
| |
|
| | for i in tqdm(range(batch_size), desc="Soft-Erosion", leave=False): |
| | input_tensor = x[i:i+1] |
| | input_tensor = input_tensor.float() |
| | input_tensor = input_tensor.unsqueeze(1) |
| |
|
| | for _ in range(self.iterations - 1): |
| | input_tensor = torch.min(input_tensor, F.conv2d(input_tensor, weight=self.weight, |
| | groups=input_tensor.shape[1], |
| | padding=self.padding)) |
| | input_tensor = F.conv2d(input_tensor, weight=self.weight, groups=input_tensor.shape[1], |
| | padding=self.padding) |
| |
|
| | mask = input_tensor >= self.threshold |
| | input_tensor[mask] = 1.0 |
| | input_tensor[~mask] /= input_tensor[~mask].max() |
| |
|
| | input_tensor = input_tensor.squeeze(1) |
| | output.append(input_tensor.detach().cpu().numpy()) |
| |
|
| | return np.array(output) |
| |
|
| | transform = transforms.Compose([ |
| | transforms.Resize((512, 512)), |
| | transforms.ToTensor(), |
| | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) |
| | ]) |
| |
|
| |
|
| |
|
| | def init_parsing_model(model_path, device="cpu"): |
| | net = BiSeNet(19) |
| | net.to(device) |
| | net.load_state_dict(torch.load(model_path)) |
| | net.eval() |
| | return net |
| |
|
| | def transform_images(imgs): |
| | tensor_images = torch.stack([transform(Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))) for img in imgs], dim=0) |
| | return tensor_images |
| |
|
| | def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="cpu", batch_size=8, softness=20): |
| | if softness > 0: |
| | smooth_mask = SoftErosion(kernel_size=17, threshold=0.9, iterations=softness).to(device) |
| |
|
| | masks = [] |
| | for i in tqdm(range(0, len(imgs), batch_size), total=len(imgs) // batch_size, desc="Face-parsing"): |
| | batch_imgs = imgs[i:i + batch_size] |
| |
|
| | tensor_images = transform_images(batch_imgs).to(device) |
| | with torch.no_grad(): |
| | out = net(tensor_images)[0] |
| | |
| | |
| | |
| | |
| | parsing = out.argmax(dim=1).detach().cpu().numpy() |
| | batch_masks = np.isin(parsing, classes).astype('float32') |
| |
|
| | if softness > 0: |
| | |
| | mask_tensor = torch.from_numpy(batch_masks.copy()).float().to(device) |
| | batch_masks = smooth_mask(mask_tensor).transpose(1,0,2,3)[0] |
| |
|
| | yield batch_masks |
| |
|
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| |
|