File size: 7,107 Bytes
2b534de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import numpy as np
import random
from torchvision import transforms as T
from torch import Tensor
from PIL import Image
import torchvision, torch, cv2

def get_tensor(normalize=True, toTensor=True,
    mean = (0.5, 0.5, 0.5),
    std = (0.5, 0.5, 0.5),
):
    transform_list = []
    if toTensor:
        transform_list += [torchvision.transforms.ToTensor()]

    if normalize:
        transform_list += [torchvision.transforms.Normalize(mean,std)]
    return torchvision.transforms.Compose(transform_list)

def get_tensor_clip(normalize=True, toTensor=True,
    mean = (0.48145466, 0.4578275, 0.40821073),
    std = (0.26862954, 0.26130258, 0.27577711),
):
    transform_list = []
    if toTensor:
        transform_list += [torchvision.transforms.ToTensor()]

    if normalize:
        transform_list += [torchvision.transforms.Normalize(mean,std)]
    return torchvision.transforms.Compose(transform_list)

def mask_after_npisin__2__tensor(mask_after_npisin: np.ndarray) -> Tensor:
    converted_mask = np.zeros_like(mask_after_npisin)
    converted_mask[mask_after_npisin] = 255
    mask_tensor = Image.fromarray(converted_mask).convert('L')
    mask_tensor = get_tensor(normalize=False, toTensor=True)(mask_tensor)
    mask_tensor = T.Resize([512, 512])(mask_tensor)
    return mask_tensor

# Implement perspective warp for reference images
def apply_perspective_warp(img, mask, deg_x, deg_y,
    # border_mode=cv2.BORDER_REPLICATE, interpolation=cv2.INTER_LINEAR, 
    border_mode=cv2.BORDER_CONSTANT, interpolation=cv2.INTER_CUBIC, 
    constant_border_value=(0,0,0),
    fix_edge_artifacts=False, # no noticeable difference
):
    """
    Apply a perspective warp transformation to an image and mask
    
    Args:
        img: numpy array of shape (H, W, C)
        mask: numpy array of shape (H, W)
        max_deg: maximum rotation degree
        border_mode: border handling mode (cv2.BORDER_REPLICATE, cv2.BORDER_CONSTANT, etc.)
        interpolation: interpolation method (cv2.INTER_LINEAR, cv2.INTER_CUBIC, etc.)
        constant_border_value: border color to use with BORDER_CONSTANT
        fix_edge_artifacts: Whether to apply additional processing to fix edge artifacts
    
    Returns:
        transformed_img, transformed_mask
    """
    h, w = img.shape[:2]
    assert img.shape[:2] == mask.shape[:2], f"img shape {img.shape[:2]} != mask shape {mask.shape[:2]}"
    
    # Convert degrees to radians
    rad_x = np.deg2rad(deg_x)
    rad_y = np.deg2rad(deg_y)
    
    # Calculate perspective transform matrix
    d = np.sqrt(h**2 + w**2)
    eye_to_center = d / (2 * np.tan(np.pi/8))  # approx distance from eye to image center
    
    # Define the transformation matrix
    transform = np.eye(3)
    
    # Apply rotation around X axis (vertical)
    transform = transform @ np.array([
        [1, 0, 0],
        [0, np.cos(rad_x), -np.sin(rad_x)],
        [0, np.sin(rad_x), np.cos(rad_x)]
    ])
    
    # Apply rotation around Y axis (horizontal)
    transform = transform @ np.array([
        [np.cos(rad_y), 0, np.sin(rad_y)],
        [0, 1, 0],
        [-np.sin(rad_y), 0, np.cos(rad_y)]
    ])
    
    # Project 3D points onto 2D plane
    pts_3d = np.array([
        [-w/2, -h/2, 0],
        [w/2, -h/2, 0],
        [w/2, h/2, 0],
        [-w/2, h/2, 0]
    ])
    
    # Apply transformation
    pts_3d_transformed = pts_3d @ transform.T
    
    # Project to 2D
    pts_3d_transformed[:, 0] = pts_3d_transformed[:, 0] * eye_to_center / (eye_to_center + pts_3d_transformed[:, 2]) + w/2
    pts_3d_transformed[:, 1] = pts_3d_transformed[:, 1] * eye_to_center / (eye_to_center + pts_3d_transformed[:, 2]) + h/2
    
    src_pts = np.array([
        [0, 0],
        [w-1, 0],
        [w-1, h-1],
        [0, h-1]
    ], dtype=np.float32)
    
    dst_pts = np.array([
        [pts_3d_transformed[0, 0], pts_3d_transformed[0, 1]],
        [pts_3d_transformed[1, 0], pts_3d_transformed[1, 1]],
        [pts_3d_transformed[2, 0], pts_3d_transformed[2, 1]],
        [pts_3d_transformed[3, 0], pts_3d_transformed[3, 1]]
    ], dtype=np.float32)
    
    # Get perspective transform matrix
    M = cv2.getPerspectiveTransform(src_pts, dst_pts)
    
    # Apply perspective transformation with specified border mode and interpolation
    transformed_img = cv2.warpPerspective(img, M, (w, h), flags=interpolation, 
                                          borderMode=border_mode, 
                                          borderValue=constant_border_value)
    
    # For mask, always use nearest neighbor interpolation
    transformed_mask = cv2.warpPerspective(mask, M, (w, h), flags=cv2.INTER_NEAREST, 
                                           borderMode=border_mode, 
                                           borderValue=0)
    
    # Additional processing to fix edge artifacts
    if fix_edge_artifacts:
        # Calculate edge detection mask to find problematic areas
        edge_mask = np.ones((h, w), dtype=np.uint8)
        warped_edge_mask = cv2.warpPerspective(edge_mask, M, (w, h), flags=cv2.INTER_NEAREST, 
                                               borderMode=cv2.BORDER_CONSTANT, borderValue=0)
        
        # Create transition region mask with larger dilation for better handling of edge artifacts
        kernel = np.ones((7, 7), np.uint8)
        inner_edge = cv2.erode(warped_edge_mask, kernel)
        transition_mask = warped_edge_mask - inner_edge
        
        # Only focus on vertical edges where artifacts are most common
        left_margin = 20
        right_margin = 20
        vertical_edge_mask = np.zeros_like(transition_mask)
        vertical_edge_mask[:, :left_margin] = transition_mask[:, :left_margin]
        vertical_edge_mask[:, -right_margin:] = transition_mask[:, -right_margin:]
        
        # Apply stronger smoothing specifically to vertical edges
        if len(transformed_img.shape) == 3:
            # Create a smooth blend from interior to exterior
            for i in range(3):  # For each color channel
                if np.sum(vertical_edge_mask) > 0:
                    # Apply a stronger blur to vertical edges
                    blurred = cv2.GaussianBlur(transformed_img, (9, 9), 0)
                    vertical_edge_mask_3d = np.stack([vertical_edge_mask] * 3, axis=2) / 255.0
                    transformed_img = transformed_img * (1 - vertical_edge_mask_3d) + blurred * vertical_edge_mask_3d
            
            # Apply general edge blending as well
            edge_blurred = cv2.GaussianBlur(transformed_img, (3, 3), 0)
            transition_mask_3d = np.stack([transition_mask] * 3, axis=2) / 255.0
            transformed_img = transformed_img * (1 - transition_mask_3d) + edge_blurred * transition_mask_3d
    
    # Ensure the output image is uint8
    if transformed_img.dtype != np.uint8:
        transformed_img = np.clip(transformed_img, 0, 255).astype(np.uint8)
    
    # Ensure the output mask is uint8
    if transformed_mask.dtype != np.uint8:
        transformed_mask = np.clip(transformed_mask, 0, 255).astype(np.uint8)
    
    return transformed_img, transformed_mask