File size: 7,107 Bytes

2b534de

import numpy as np
import random
from torchvision import transforms as T
from torch import Tensor
from PIL import Image
import torchvision, torch, cv2

def get_tensor(normalize=True, toTensor=True,
    mean = (0.5, 0.5, 0.5),
    std = (0.5, 0.5, 0.5),
):
    transform_list = []
    if toTensor:
        transform_list += [torchvision.transforms.ToTensor()]

    if normalize:
        transform_list += [torchvision.transforms.Normalize(mean,std)]
    return torchvision.transforms.Compose(transform_list)

def get_tensor_clip(normalize=True, toTensor=True,
    mean = (0.48145466, 0.4578275, 0.40821073),
    std = (0.26862954, 0.26130258, 0.27577711),
):
    transform_list = []
    if toTensor:
        transform_list += [torchvision.transforms.ToTensor()]

    if normalize:
        transform_list += [torchvision.transforms.Normalize(mean,std)]
    return torchvision.transforms.Compose(transform_list)

def mask_after_npisin__2__tensor(mask_after_npisin: np.ndarray) -> Tensor:
    converted_mask = np.zeros_like(mask_after_npisin)
    converted_mask[mask_after_npisin] = 255
    mask_tensor = Image.fromarray(converted_mask).convert('L')
    mask_tensor = get_tensor(normalize=False, toTensor=True)(mask_tensor)
    mask_tensor = T.Resize([512, 512])(mask_tensor)
    return mask_tensor

# Implement perspective warp for reference images
def apply_perspective_warp(img, mask, deg_x, deg_y,
    # border_mode=cv2.BORDER_REPLICATE, interpolation=cv2.INTER_LINEAR, 
    border_mode=cv2.BORDER_CONSTANT, interpolation=cv2.INTER_CUBIC, 
    constant_border_value=(0,0,0),
    fix_edge_artifacts=False, # no noticeable difference
):
    """
    Apply a perspective warp transformation to an image and mask
    
    Args:
        img: numpy array of shape (H, W, C)
        mask: numpy array of shape (H, W)
        max_deg: maximum rotation degree
        border_mode: border handling mode (cv2.BORDER_REPLICATE, cv2.BORDER_CONSTANT, etc.)
        interpolation: interpolation method (cv2.INTER_LINEAR, cv2.INTER_CUBIC, etc.)
        constant_border_value: border color to use with BORDER_CONSTANT
        fix_edge_artifacts: Whether to apply additional processing to fix edge artifacts
    
    Returns:
        transformed_img, transformed_mask
    """
    h, w = img.shape[:2]
    assert img.shape[:2] == mask.shape[:2], f"img shape {img.shape[:2]} != mask shape {mask.shape[:2]}"
    
    # Convert degrees to radians
    rad_x = np.deg2rad(deg_x)
    rad_y = np.deg2rad(deg_y)
    
    # Calculate perspective transform matrix
    d = np.sqrt(h**2 + w**2)
    eye_to_center = d / (2 * np.tan(np.pi/8))  # approx distance from eye to image center
    
    # Define the transformation matrix
    transform = np.eye(3)
    
    # Apply rotation around X axis (vertical)
    transform = transform @ np.array([
        [1, 0, 0],
        [0, np.cos(rad_x), -np.sin(rad_x)],
        [0, np.sin(rad_x), np.cos(rad_x)]
    ])
    
    # Apply rotation around Y axis (horizontal)
    transform = transform @ np.array([
        [np.cos(rad_y), 0, np.sin(rad_y)],
        [0, 1, 0],
        [-np.sin(rad_y), 0, np.cos(rad_y)]
    ])
    
    # Project 3D points onto 2D plane
    pts_3d = np.array([
        [-w/2, -h/2, 0],
        [w/2, -h/2, 0],
        [w/2, h/2, 0],
        [-w/2, h/2, 0]
    ])
    
    # Apply transformation
    pts_3d_transformed = pts_3d @ transform.T
    
    # Project to 2D
    pts_3d_transformed[:, 0] = pts_3d_transformed[:, 0] * eye_to_center / (eye_to_center + pts_3d_transformed[:, 2]) + w/2
    pts_3d_transformed[:, 1] = pts_3d_transformed[:, 1] * eye_to_center / (eye_to_center + pts_3d_transformed[:, 2]) + h/2
    
    src_pts = np.array([
        [0, 0],
        [w-1, 0],
        [w-1, h-1],
        [0, h-1]
    ], dtype=np.float32)
    
    dst_pts = np.array([
        [pts_3d_transformed[0, 0], pts_3d_transformed[0, 1]],
        [pts_3d_transformed[1, 0], pts_3d_transformed[1, 1]],
        [pts_3d_transformed[2, 0], pts_3d_transformed[2, 1]],
        [pts_3d_transformed[3, 0], pts_3d_transformed[3, 1]]
    ], dtype=np.float32)
    
    # Get perspective transform matrix
    M = cv2.getPerspectiveTransform(src_pts, dst_pts)
    
    # Apply perspective transformation with specified border mode and interpolation
    transformed_img = cv2.warpPerspective(img, M, (w, h), flags=interpolation, 
                                          borderMode=border_mode, 
                                          borderValue=constant_border_value)
    
    # For mask, always use nearest neighbor interpolation
    transformed_mask = cv2.warpPerspective(mask, M, (w, h), flags=cv2.INTER_NEAREST, 
                                           borderMode=border_mode, 
                                           borderValue=0)
    
    # Additional processing to fix edge artifacts
    if fix_edge_artifacts:
        # Calculate edge detection mask to find problematic areas
        edge_mask = np.ones((h, w), dtype=np.uint8)
        warped_edge_mask = cv2.warpPerspective(edge_mask, M, (w, h), flags=cv2.INTER_NEAREST, 
                                               borderMode=cv2.BORDER_CONSTANT, borderValue=0)
        
        # Create transition region mask with larger dilation for better handling of edge artifacts
        kernel = np.ones((7, 7), np.uint8)
        inner_edge = cv2.erode(warped_edge_mask, kernel)
        transition_mask = warped_edge_mask - inner_edge
        
        # Only focus on vertical edges where artifacts are most common
        left_margin = 20
        right_margin = 20
        vertical_edge_mask = np.zeros_like(transition_mask)
        vertical_edge_mask[:, :left_margin] = transition_mask[:, :left_margin]
        vertical_edge_mask[:, -right_margin:] = transition_mask[:, -right_margin:]
        
        # Apply stronger smoothing specifically to vertical edges
        if len(transformed_img.shape) == 3:
            # Create a smooth blend from interior to exterior
            for i in range(3):  # For each color channel
                if np.sum(vertical_edge_mask) > 0:
                    # Apply a stronger blur to vertical edges
                    blurred = cv2.GaussianBlur(transformed_img, (9, 9), 0)
                    vertical_edge_mask_3d = np.stack([vertical_edge_mask] * 3, axis=2) / 255.0
                    transformed_img = transformed_img * (1 - vertical_edge_mask_3d) + blurred * vertical_edge_mask_3d
            
            # Apply general edge blending as well
            edge_blurred = cv2.GaussianBlur(transformed_img, (3, 3), 0)
            transition_mask_3d = np.stack([transition_mask] * 3, axis=2) / 255.0
            transformed_img = transformed_img * (1 - transition_mask_3d) + edge_blurred * transition_mask_3d
    
    # Ensure the output image is uint8
    if transformed_img.dtype != np.uint8:
        transformed_img = np.clip(transformed_img, 0, 255).astype(np.uint8)
    
    # Ensure the output mask is uint8
    if transformed_mask.dtype != np.uint8:
        transformed_mask = np.clip(transformed_mask, 0, 255).astype(np.uint8)
    
    return transformed_img, transformed_mask