File size: 7,107 Bytes
2b534de | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | import numpy as np
import random
from torchvision import transforms as T
from torch import Tensor
from PIL import Image
import torchvision, torch, cv2
def get_tensor(normalize=True, toTensor=True,
mean = (0.5, 0.5, 0.5),
std = (0.5, 0.5, 0.5),
):
transform_list = []
if toTensor:
transform_list += [torchvision.transforms.ToTensor()]
if normalize:
transform_list += [torchvision.transforms.Normalize(mean,std)]
return torchvision.transforms.Compose(transform_list)
def get_tensor_clip(normalize=True, toTensor=True,
mean = (0.48145466, 0.4578275, 0.40821073),
std = (0.26862954, 0.26130258, 0.27577711),
):
transform_list = []
if toTensor:
transform_list += [torchvision.transforms.ToTensor()]
if normalize:
transform_list += [torchvision.transforms.Normalize(mean,std)]
return torchvision.transforms.Compose(transform_list)
def mask_after_npisin__2__tensor(mask_after_npisin: np.ndarray) -> Tensor:
converted_mask = np.zeros_like(mask_after_npisin)
converted_mask[mask_after_npisin] = 255
mask_tensor = Image.fromarray(converted_mask).convert('L')
mask_tensor = get_tensor(normalize=False, toTensor=True)(mask_tensor)
mask_tensor = T.Resize([512, 512])(mask_tensor)
return mask_tensor
# Implement perspective warp for reference images
def apply_perspective_warp(img, mask, deg_x, deg_y,
# border_mode=cv2.BORDER_REPLICATE, interpolation=cv2.INTER_LINEAR,
border_mode=cv2.BORDER_CONSTANT, interpolation=cv2.INTER_CUBIC,
constant_border_value=(0,0,0),
fix_edge_artifacts=False, # no noticeable difference
):
"""
Apply a perspective warp transformation to an image and mask
Args:
img: numpy array of shape (H, W, C)
mask: numpy array of shape (H, W)
max_deg: maximum rotation degree
border_mode: border handling mode (cv2.BORDER_REPLICATE, cv2.BORDER_CONSTANT, etc.)
interpolation: interpolation method (cv2.INTER_LINEAR, cv2.INTER_CUBIC, etc.)
constant_border_value: border color to use with BORDER_CONSTANT
fix_edge_artifacts: Whether to apply additional processing to fix edge artifacts
Returns:
transformed_img, transformed_mask
"""
h, w = img.shape[:2]
assert img.shape[:2] == mask.shape[:2], f"img shape {img.shape[:2]} != mask shape {mask.shape[:2]}"
# Convert degrees to radians
rad_x = np.deg2rad(deg_x)
rad_y = np.deg2rad(deg_y)
# Calculate perspective transform matrix
d = np.sqrt(h**2 + w**2)
eye_to_center = d / (2 * np.tan(np.pi/8)) # approx distance from eye to image center
# Define the transformation matrix
transform = np.eye(3)
# Apply rotation around X axis (vertical)
transform = transform @ np.array([
[1, 0, 0],
[0, np.cos(rad_x), -np.sin(rad_x)],
[0, np.sin(rad_x), np.cos(rad_x)]
])
# Apply rotation around Y axis (horizontal)
transform = transform @ np.array([
[np.cos(rad_y), 0, np.sin(rad_y)],
[0, 1, 0],
[-np.sin(rad_y), 0, np.cos(rad_y)]
])
# Project 3D points onto 2D plane
pts_3d = np.array([
[-w/2, -h/2, 0],
[w/2, -h/2, 0],
[w/2, h/2, 0],
[-w/2, h/2, 0]
])
# Apply transformation
pts_3d_transformed = pts_3d @ transform.T
# Project to 2D
pts_3d_transformed[:, 0] = pts_3d_transformed[:, 0] * eye_to_center / (eye_to_center + pts_3d_transformed[:, 2]) + w/2
pts_3d_transformed[:, 1] = pts_3d_transformed[:, 1] * eye_to_center / (eye_to_center + pts_3d_transformed[:, 2]) + h/2
src_pts = np.array([
[0, 0],
[w-1, 0],
[w-1, h-1],
[0, h-1]
], dtype=np.float32)
dst_pts = np.array([
[pts_3d_transformed[0, 0], pts_3d_transformed[0, 1]],
[pts_3d_transformed[1, 0], pts_3d_transformed[1, 1]],
[pts_3d_transformed[2, 0], pts_3d_transformed[2, 1]],
[pts_3d_transformed[3, 0], pts_3d_transformed[3, 1]]
], dtype=np.float32)
# Get perspective transform matrix
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
# Apply perspective transformation with specified border mode and interpolation
transformed_img = cv2.warpPerspective(img, M, (w, h), flags=interpolation,
borderMode=border_mode,
borderValue=constant_border_value)
# For mask, always use nearest neighbor interpolation
transformed_mask = cv2.warpPerspective(mask, M, (w, h), flags=cv2.INTER_NEAREST,
borderMode=border_mode,
borderValue=0)
# Additional processing to fix edge artifacts
if fix_edge_artifacts:
# Calculate edge detection mask to find problematic areas
edge_mask = np.ones((h, w), dtype=np.uint8)
warped_edge_mask = cv2.warpPerspective(edge_mask, M, (w, h), flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT, borderValue=0)
# Create transition region mask with larger dilation for better handling of edge artifacts
kernel = np.ones((7, 7), np.uint8)
inner_edge = cv2.erode(warped_edge_mask, kernel)
transition_mask = warped_edge_mask - inner_edge
# Only focus on vertical edges where artifacts are most common
left_margin = 20
right_margin = 20
vertical_edge_mask = np.zeros_like(transition_mask)
vertical_edge_mask[:, :left_margin] = transition_mask[:, :left_margin]
vertical_edge_mask[:, -right_margin:] = transition_mask[:, -right_margin:]
# Apply stronger smoothing specifically to vertical edges
if len(transformed_img.shape) == 3:
# Create a smooth blend from interior to exterior
for i in range(3): # For each color channel
if np.sum(vertical_edge_mask) > 0:
# Apply a stronger blur to vertical edges
blurred = cv2.GaussianBlur(transformed_img, (9, 9), 0)
vertical_edge_mask_3d = np.stack([vertical_edge_mask] * 3, axis=2) / 255.0
transformed_img = transformed_img * (1 - vertical_edge_mask_3d) + blurred * vertical_edge_mask_3d
# Apply general edge blending as well
edge_blurred = cv2.GaussianBlur(transformed_img, (3, 3), 0)
transition_mask_3d = np.stack([transition_mask] * 3, axis=2) / 255.0
transformed_img = transformed_img * (1 - transition_mask_3d) + edge_blurred * transition_mask_3d
# Ensure the output image is uint8
if transformed_img.dtype != np.uint8:
transformed_img = np.clip(transformed_img, 0, 255).astype(np.uint8)
# Ensure the output mask is uint8
if transformed_mask.dtype != np.uint8:
transformed_mask = np.clip(transformed_mask, 0, 255).astype(np.uint8)
return transformed_img, transformed_mask
|