|
|
from PIL import Image |
|
|
import numpy as np |
|
|
import cv2 |
|
|
from typing import Union |
|
|
from scipy.spatial import ConvexHull |
|
|
from skimage.draw import polygon |
|
|
|
|
|
DENSE_INDEX_MAP = { |
|
|
"background": [0], |
|
|
"torso": [1, 2], |
|
|
"right hand": [3], |
|
|
"left hand": [4], |
|
|
"right foot": [5], |
|
|
"left foot": [6], |
|
|
"right thigh": [7, 9], |
|
|
"left thigh": [8, 10], |
|
|
"right leg": [11, 13], |
|
|
"left leg": [12, 14], |
|
|
"left big arm": [15, 17], |
|
|
"right big arm": [16, 18], |
|
|
"left forearm": [19, 21], |
|
|
"right forearm": [20, 22], |
|
|
"face": [23, 24], |
|
|
"thighs": [7, 8, 9, 10], |
|
|
"legs": [11, 12, 13, 14], |
|
|
"hands": [3, 4], |
|
|
"feet": [5, 6], |
|
|
"big arms": [15, 16, 17, 18], |
|
|
"forearms": [19, 20, 21, 22], |
|
|
} |
|
|
ATR_MAPPING = { |
|
|
"Background": 0, |
|
|
"Hat": 1, |
|
|
"Hair": 2, |
|
|
"Sunglasses": 3, |
|
|
"Upper-clothes": 4, |
|
|
"Skirt": 5, |
|
|
"Pants": 6, |
|
|
"Dress": 7, |
|
|
"Belt": 8, |
|
|
"Left-shoe": 9, |
|
|
"Right-shoe": 10, |
|
|
"Face": 11, |
|
|
"Left-leg": 12, |
|
|
"Right-leg": 13, |
|
|
"Left-arm": 14, |
|
|
"Right-arm": 15, |
|
|
"Bag": 16, |
|
|
"Scarf": 17, |
|
|
} |
|
|
LIP_MAPPING = { |
|
|
"Background": 0, |
|
|
"Hat": 1, |
|
|
"Hair": 2, |
|
|
"Glove": 3, |
|
|
"Sunglasses": 4, |
|
|
"Upper-clothes": 5, |
|
|
"Dress": 6, |
|
|
"Coat": 7, |
|
|
"Socks": 8, |
|
|
"Pants": 9, |
|
|
"Jumpsuits": 10, |
|
|
"Scarf": 11, |
|
|
"Skirt": 12, |
|
|
"Face": 13, |
|
|
"Left-arm": 14, |
|
|
"Right-arm": 15, |
|
|
"Left-leg": 16, |
|
|
"Right-leg": 17, |
|
|
"Left-shoe": 18, |
|
|
"Right-shoe": 19, |
|
|
} |
|
|
|
|
|
PROTECT_BODY_PARTS = { |
|
|
"upper": ["Left-leg", "Right-leg"], |
|
|
"lower": ["Right-arm", "Left-arm", "Face"], |
|
|
"overall": [], |
|
|
"inner": ["Left-leg", "Right-leg"], |
|
|
"outer": ["Left-leg", "Right-leg"], |
|
|
} |
|
|
PROTECT_CLOTH_PARTS = { |
|
|
"upper": {"ATR": ["Skirt", "Pants"], "LIP": ["Skirt", "Pants"]}, |
|
|
"lower": { |
|
|
"ATR": ["Upper-clothes", "Left-shoe", "Right-shoe"], |
|
|
"LIP": ["Upper-clothes", "Coat", "Left-shoe", "Right-shoe"], |
|
|
}, |
|
|
"overall": { |
|
|
|
|
|
|
|
|
"ATR": ["Left-shoe", "Right-shoe"], |
|
|
"LIP": ["Left-shoe", "Right-shoe"], |
|
|
}, |
|
|
"inner": { |
|
|
"ATR": ["Dress", "Coat", "Skirt", "Pants"], |
|
|
"LIP": ["Dress", "Coat", "Skirt", "Pants", "Jumpsuits"], |
|
|
}, |
|
|
"outer": { |
|
|
"ATR": ["Dress", "Pants", "Skirt"], |
|
|
"LIP": ["Upper-clothes", "Dress", "Pants", "Skirt", "Jumpsuits"], |
|
|
}, |
|
|
} |
|
|
PUBLIC_ACCESSORY_PARTS = [ |
|
|
"Hat", |
|
|
"Glove", |
|
|
"Sunglasses", |
|
|
"Scarf", |
|
|
"Bag", |
|
|
"Socks", |
|
|
] |
|
|
MASK_CLOTH_PARTS = { |
|
|
"upper": ["Upper-clothes", "Coat", "Dress", "Jumpsuits"], |
|
|
"lower": ["Pants", "Skirt", "Dress", "Jumpsuits"], |
|
|
"overall": [ |
|
|
"Upper-clothes", |
|
|
"Dress", |
|
|
"Pants", |
|
|
"Skirt", |
|
|
"Coat", |
|
|
"Jumpsuits", |
|
|
], |
|
|
"inner": ["Upper-clothes"], |
|
|
"outer": [ |
|
|
"Coat", |
|
|
], |
|
|
} |
|
|
MASK_DENSE_PARTS = { |
|
|
"upper": ["torso", "big arms", "forearms"], |
|
|
"lower": ["thighs", "legs"], |
|
|
"overall": ["torso", "thighs", "legs", "big arms", "forearms"], |
|
|
"inner": ["torso"], |
|
|
"outer": ["torso", "big arms", "forearms"], |
|
|
} |
|
|
|
|
|
|
|
|
def random_convex_mask(height, width, min_area_ratio=0.25, jitter=10, extra_points=20): |
|
|
""" |
|
|
在图像中随机位置生成一个凸多边形掩码,其包含一个面积不少于总面积1/4的扰动矩形。 |
|
|
|
|
|
参数: |
|
|
height (int): 图像高度 |
|
|
width (int): 图像宽度 |
|
|
min_area_ratio (float): 最小面积比例 |
|
|
jitter (int): 扰动范围(用于扩展凸多边形) |
|
|
extra_points (int): 随机扰动点数量 |
|
|
|
|
|
返回: |
|
|
np.ndarray: 掩码数组 (0 背景, 1 前景) |
|
|
""" |
|
|
rng = np.random.default_rng() |
|
|
total_area = height * width |
|
|
min_area = total_area * min_area_ratio |
|
|
|
|
|
|
|
|
aspect_ratio = rng.uniform(0.5, 2.0) |
|
|
rect_h = int(np.sqrt(min_area / aspect_ratio)) |
|
|
rect_w = int(rect_h * aspect_ratio) |
|
|
|
|
|
|
|
|
max_x0 = width - rect_w - 1 |
|
|
max_y0 = height - rect_h - 1 |
|
|
x0 = rng.integers(0, max(1, max_x0)) |
|
|
y0 = rng.integers(0, max(1, max_y0)) |
|
|
x1 = x0 + rect_w |
|
|
y1 = y0 + rect_h |
|
|
|
|
|
|
|
|
base_points = np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]]) |
|
|
jitter_points = rng.integers( |
|
|
low=[max(0, x0 - jitter), max(0, y0 - jitter)], |
|
|
high=[min(width, x1 + jitter), min(height, y1 + jitter)], |
|
|
size=(extra_points, 2), |
|
|
) |
|
|
all_points = np.vstack([base_points, jitter_points]) |
|
|
|
|
|
|
|
|
hull = ConvexHull(all_points) |
|
|
hull_points = all_points[hull.vertices] |
|
|
|
|
|
mask = np.zeros((height, width), dtype=np.uint8) |
|
|
rr, cc = polygon(hull_points[:, 1], hull_points[:, 0], shape=mask.shape) |
|
|
mask[rr, cc] = 1 |
|
|
|
|
|
return mask |
|
|
|
|
|
|
|
|
def part_mask_of(part: Union[str, list], parse: np.ndarray, mapping: dict): |
|
|
if isinstance(part, str): |
|
|
part = [part] |
|
|
mask = np.zeros_like(parse) |
|
|
for _ in part: |
|
|
if _ not in mapping: |
|
|
continue |
|
|
if isinstance(mapping[_], list): |
|
|
for i in mapping[_]: |
|
|
mask += parse == i |
|
|
else: |
|
|
mask += parse == mapping[_] |
|
|
return mask |
|
|
|
|
|
|
|
|
def hull_mask(mask_area: np.ndarray): |
|
|
if len(mask_area.shape) > 2: |
|
|
mask_area = mask_area[:, :, 0] |
|
|
|
|
|
ret, binary = cv2.threshold(mask_area, 127, 255, cv2.THRESH_BINARY) |
|
|
contours, hierarchy = cv2.findContours( |
|
|
binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE |
|
|
) |
|
|
hull_mask = np.zeros_like(mask_area) |
|
|
for c in contours: |
|
|
hull = cv2.convexHull(c) |
|
|
hull_mask = cv2.fillPoly(np.zeros_like(mask_area), [hull], 255) | hull_mask |
|
|
return hull_mask |
|
|
|
|
|
|
|
|
def create_square_mask(mask_area, expand_ratio=0.1): |
|
|
""" |
|
|
将非零区域的最小外接矩形填充为1,并随机扩大边界 |
|
|
|
|
|
Args: |
|
|
mask_area (np.ndarray): 原始 mask |
|
|
expand_ratio (float, optional): 边界随机扩大的最大比例。默认为0.1(10%) |
|
|
|
|
|
Returns: |
|
|
np.ndarray: 原始大小的 mask,矩形区域为1 |
|
|
""" |
|
|
|
|
|
rows = np.any(mask_area, axis=1) |
|
|
cols = np.any(mask_area, axis=0) |
|
|
|
|
|
|
|
|
rmin, rmax = np.where(rows)[0][[0, -1]] |
|
|
cmin, cmax = np.where(cols)[0][[0, -1]] |
|
|
|
|
|
|
|
|
height = rmax - rmin + 1 |
|
|
width = cmax - cmin + 1 |
|
|
|
|
|
|
|
|
expand_height = int(height * expand_ratio) |
|
|
expand_width = int(width * expand_ratio) |
|
|
|
|
|
|
|
|
new_rmin = max(0, rmin - np.random.randint(0, expand_height + 1)) |
|
|
new_rmax = min( |
|
|
mask_area.shape[0] - 1, rmax + np.random.randint(0, expand_height + 1) |
|
|
) |
|
|
new_cmin = max(0, cmin - np.random.randint(0, expand_width + 1)) |
|
|
new_cmax = min( |
|
|
mask_area.shape[1] - 1, cmax + np.random.randint(0, expand_width + 1) |
|
|
) |
|
|
|
|
|
|
|
|
new_mask_area = np.zeros_like(mask_area) |
|
|
new_mask_area[new_rmin : new_rmax + 1, new_cmin : new_cmax + 1] = 1 |
|
|
|
|
|
return new_mask_area |
|
|
|
|
|
|
|
|
def create_bounding_box_mask( |
|
|
mask_area: np.ndarray, |
|
|
strong_protect_area: np.ndarray, |
|
|
dilate_kernel: np.ndarray, |
|
|
horizon_expand: bool = False, |
|
|
) -> np.ndarray: |
|
|
"""创建包含原始mask区域的外接矩形Mask,并排除保护区域 |
|
|
|
|
|
Args: |
|
|
mask_area (np.ndarray): 原始mask区域 |
|
|
strong_protect_area (np.ndarray): 需要排除的强保护区域 |
|
|
dilate_kernel (np.ndarray): 用于膨胀操作的kernel |
|
|
horizon_expand (bool): 是否启用随机水平扩展 |
|
|
|
|
|
Returns: |
|
|
np.ndarray: 处理后的矩形mask |
|
|
""" |
|
|
|
|
|
coords = cv2.findNonZero(mask_area) |
|
|
if coords is not None: |
|
|
|
|
|
x, y, w, h = cv2.boundingRect(coords) |
|
|
|
|
|
|
|
|
if horizon_expand: |
|
|
img_width = mask_area.shape[1] |
|
|
|
|
|
left_expand = int(np.random.uniform(0.0, 0.2) * w) |
|
|
right_expand = int(np.random.uniform(0.0, 0.2) * w) |
|
|
|
|
|
|
|
|
x_expanded = max(0, x - left_expand) |
|
|
w_expanded = min(img_width - x_expanded, w + left_expand + right_expand) |
|
|
|
|
|
x, w = x_expanded, w_expanded |
|
|
|
|
|
|
|
|
rect_mask = np.zeros_like(mask_area) |
|
|
rect_mask[y : y + h, x : x + w] = 1 |
|
|
|
|
|
rect_mask = rect_mask & (~strong_protect_area) |
|
|
|
|
|
rect_mask = cv2.dilate(rect_mask, dilate_kernel, iterations=1) |
|
|
return rect_mask |
|
|
return mask_area |
|
|
|
|
|
|
|
|
def cloth_agnostic_mask( |
|
|
densepose_mask: np.ndarray, |
|
|
schp_lip_mask: np.ndarray, |
|
|
schp_atr_mask: np.ndarray, |
|
|
part: str = "overall", |
|
|
square_cloth_mask: bool = False, |
|
|
**kwargs, |
|
|
) -> Image.Image: |
|
|
if part == "full" or part == "dresses": |
|
|
part = "overall" |
|
|
assert part in ["upper", "lower", "overall", "inner", "outer"], ( |
|
|
f"part should be one of ['upper', 'lower', 'overall', 'inner', 'outer'], but got {part}" |
|
|
) |
|
|
w, h = densepose_mask.shape[:2] |
|
|
|
|
|
dilate_kernel = max(w, h) // 500 |
|
|
dilate_kernel = dilate_kernel if dilate_kernel % 2 == 1 else dilate_kernel + 1 |
|
|
dilate_kernel = np.ones((dilate_kernel, dilate_kernel), np.uint8) |
|
|
|
|
|
kernal_size = max(w, h) // 50 |
|
|
kernal_size = kernal_size if kernal_size % 2 == 1 else kernal_size + 1 |
|
|
|
|
|
|
|
|
hands_protect_area = part_mask_of( |
|
|
["hands", "feet"], densepose_mask, DENSE_INDEX_MAP |
|
|
) |
|
|
hands_protect_area = cv2.dilate(hands_protect_area, dilate_kernel, iterations=1) |
|
|
hands_protect_area = hands_protect_area & ( |
|
|
part_mask_of( |
|
|
["Left-arm", "Right-arm", "Left-leg", "Right-leg"], |
|
|
schp_atr_mask, |
|
|
ATR_MAPPING, |
|
|
) |
|
|
| part_mask_of( |
|
|
["Left-arm", "Right-arm", "Left-leg", "Right-leg"], |
|
|
schp_lip_mask, |
|
|
LIP_MAPPING, |
|
|
) |
|
|
) |
|
|
face_protect_area = part_mask_of("Face", schp_lip_mask, LIP_MAPPING) |
|
|
|
|
|
strong_protect_area = hands_protect_area | face_protect_area |
|
|
|
|
|
|
|
|
body_protect_area = part_mask_of( |
|
|
PROTECT_BODY_PARTS[part], schp_lip_mask, LIP_MAPPING |
|
|
) | part_mask_of(PROTECT_BODY_PARTS[part], schp_atr_mask, ATR_MAPPING) |
|
|
hair_protect_area = part_mask_of( |
|
|
["Hair"], schp_lip_mask, LIP_MAPPING |
|
|
) | part_mask_of(["Hair"], schp_atr_mask, ATR_MAPPING) |
|
|
cloth_protect_area = part_mask_of( |
|
|
PROTECT_CLOTH_PARTS[part]["LIP"], schp_lip_mask, LIP_MAPPING |
|
|
) | part_mask_of(PROTECT_CLOTH_PARTS[part]["ATR"], schp_atr_mask, ATR_MAPPING) |
|
|
accessory_protect_area = part_mask_of( |
|
|
PUBLIC_ACCESSORY_PARTS, schp_lip_mask, LIP_MAPPING |
|
|
) | part_mask_of(PUBLIC_ACCESSORY_PARTS, schp_atr_mask, ATR_MAPPING) |
|
|
weak_protect_area = ( |
|
|
body_protect_area |
|
|
| cloth_protect_area |
|
|
| hair_protect_area |
|
|
| strong_protect_area |
|
|
| accessory_protect_area |
|
|
) |
|
|
|
|
|
|
|
|
strong_mask_area = part_mask_of( |
|
|
MASK_CLOTH_PARTS[part], schp_lip_mask, LIP_MAPPING |
|
|
) | part_mask_of(MASK_CLOTH_PARTS[part], schp_atr_mask, ATR_MAPPING) |
|
|
strong_mask_area = cv2.dilate( |
|
|
strong_mask_area, dilate_kernel // 2 + 1, iterations=1 |
|
|
) |
|
|
|
|
|
background_area = part_mask_of( |
|
|
["Background"], schp_lip_mask, LIP_MAPPING |
|
|
) & part_mask_of(["Background"], schp_atr_mask, ATR_MAPPING) |
|
|
mask_dense_area = part_mask_of( |
|
|
MASK_DENSE_PARTS[part] + ['right foot', 'left foot'], densepose_mask, DENSE_INDEX_MAP |
|
|
) |
|
|
mask_dense_area = cv2.resize( |
|
|
mask_dense_area.astype(np.uint8), |
|
|
None, |
|
|
fx=0.25, |
|
|
fy=0.25, |
|
|
interpolation=cv2.INTER_NEAREST, |
|
|
) |
|
|
mask_dense_area = cv2.dilate(mask_dense_area, dilate_kernel, iterations=2) |
|
|
mask_dense_area = cv2.resize( |
|
|
mask_dense_area.astype(np.uint8), |
|
|
None, |
|
|
fx=4, |
|
|
fy=4, |
|
|
interpolation=cv2.INTER_NEAREST, |
|
|
) |
|
|
|
|
|
mask_area = ( |
|
|
np.ones_like(densepose_mask) & (~weak_protect_area) & (~background_area) |
|
|
) | mask_dense_area |
|
|
|
|
|
|
|
|
mask_area = cv2.GaussianBlur(mask_area * 255, (kernal_size, kernal_size), 0) |
|
|
mask_area[mask_area < 100] = 0 |
|
|
mask_area[mask_area >= 100] = 1 |
|
|
|
|
|
|
|
|
num_labels, labels = cv2.connectedComponents(mask_area.astype(np.uint8)) |
|
|
if num_labels > 2: |
|
|
label_counts = np.bincount(labels.flatten()) |
|
|
label_counts[0] = 0 |
|
|
largest_label = np.argmax(label_counts) |
|
|
mask_area = (labels == largest_label).astype( |
|
|
np.uint8 |
|
|
) |
|
|
|
|
|
|
|
|
mask_area = hull_mask(mask_area * 255) // 255 |
|
|
mask_area = mask_area & (~weak_protect_area) |
|
|
mask_area = cv2.GaussianBlur(mask_area * 255, (kernal_size, kernal_size), 0) |
|
|
mask_area[mask_area < 25] = 0 |
|
|
mask_area[mask_area >= 25] = 1 |
|
|
|
|
|
|
|
|
if square_cloth_mask: |
|
|
weak_protect_area_ = weak_protect_area & (~mask_area) |
|
|
mask_area = create_bounding_box_mask( |
|
|
mask_area, weak_protect_area_, dilate_kernel |
|
|
) |
|
|
mask_area = mask_area & ~face_protect_area |
|
|
|
|
|
|
|
|
mask_area = cv2.dilate(mask_area, dilate_kernel - 2, iterations=2) |
|
|
|
|
|
return Image.fromarray(mask_area * 255) |
|
|
|
|
|
|
|
|
def multi_ref_cloth_agnostic_mask( |
|
|
densepose_mask: np.ndarray, |
|
|
schp_lip_mask: np.ndarray, |
|
|
schp_atr_mask: np.ndarray, |
|
|
square_cloth_mask: bool = False, |
|
|
horizon_expand: bool = False, |
|
|
**kwargs, |
|
|
) -> Image.Image: |
|
|
w, h = densepose_mask.shape[:2] |
|
|
|
|
|
dilate_kernel = max(w, h) // 500 |
|
|
dilate_kernel = dilate_kernel if dilate_kernel % 2 == 1 else dilate_kernel + 1 |
|
|
dilate_kernel = np.ones((dilate_kernel, dilate_kernel), np.uint8) |
|
|
|
|
|
kernal_size = max(w, h) // 50 |
|
|
kernal_size = kernal_size if kernal_size % 2 == 1 else kernal_size + 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_protect_area = part_mask_of("Face", schp_lip_mask, LIP_MAPPING) |
|
|
|
|
|
strong_protect_area = face_protect_area |
|
|
|
|
|
|
|
|
mask_keys = [ |
|
|
"Upper-clothes", |
|
|
"Dress", |
|
|
"Pants", |
|
|
"Skirt", |
|
|
"Coat", |
|
|
"Jumpsuits", |
|
|
"Left-shoe", |
|
|
"Right-shoe", |
|
|
"Bag", |
|
|
"Socks", |
|
|
"Belt", |
|
|
] |
|
|
strong_mask_area = part_mask_of( |
|
|
mask_keys, schp_lip_mask, LIP_MAPPING |
|
|
) | part_mask_of(mask_keys, schp_atr_mask, ATR_MAPPING) |
|
|
|
|
|
strong_mask_area = cv2.dilate( |
|
|
strong_mask_area, dilate_kernel // 2 + 1, iterations=1 |
|
|
) |
|
|
mask_dense_area = part_mask_of( |
|
|
MASK_DENSE_PARTS["overall"] + ['right foot', 'left foot'], densepose_mask, DENSE_INDEX_MAP |
|
|
) |
|
|
mask_dense_area = cv2.resize( |
|
|
mask_dense_area.astype(np.uint8), |
|
|
None, |
|
|
fx=0.25, |
|
|
fy=0.25, |
|
|
interpolation=cv2.INTER_NEAREST, |
|
|
) |
|
|
mask_dense_area = cv2.dilate(mask_dense_area, dilate_kernel, iterations=2) |
|
|
mask_dense_area = cv2.resize( |
|
|
mask_dense_area.astype(np.uint8), |
|
|
None, |
|
|
fx=4, |
|
|
fy=4, |
|
|
interpolation=cv2.INTER_NEAREST, |
|
|
) |
|
|
|
|
|
mask_area = (strong_mask_area | mask_dense_area) & (~strong_protect_area) |
|
|
|
|
|
mask_area = cv2.dilate( |
|
|
mask_area, dilate_kernel // 2 + 1, iterations=1 |
|
|
) |
|
|
|
|
|
|
|
|
mask_area = cv2.GaussianBlur(mask_area * 255, (kernal_size, kernal_size), 0) |
|
|
mask_area[mask_area < 100] = 0 |
|
|
mask_area[mask_area >= 100] = 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mask_area = hull_mask(mask_area * 255) // 255 |
|
|
mask_area = cv2.GaussianBlur(mask_area * 255, (kernal_size, kernal_size), 0) |
|
|
mask_area[mask_area < 25] = 0 |
|
|
mask_area[mask_area >= 25] = 1 |
|
|
|
|
|
|
|
|
if square_cloth_mask: |
|
|
|
|
|
mask_area = create_bounding_box_mask( |
|
|
mask_area, strong_protect_area, dilate_kernel, horizon_expand |
|
|
) |
|
|
mask_area = mask_area & ~strong_protect_area |
|
|
|
|
|
|
|
|
mask_area = cv2.dilate(mask_area, dilate_kernel - 2, iterations=2) |
|
|
|
|
|
return Image.fromarray(mask_area * 255) |
|
|
|