Spaces:

bulatko
/

zoo3d

Paused

File size: 4,906 Bytes

352cafd

import cv2
import numpy as np

import torchvision.transforms as transforms

inv_im_trans = transforms.Normalize(
                mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
                std=[1/0.229, 1/0.224, 1/0.225])

inv_seg_trans = transforms.Normalize(
    mean=[-0.5/0.5],
    std=[1/0.5])

def tensor_to_numpy(image):
    image_np = (image.numpy() * 255).astype('uint8')
    return image_np

def tensor_to_np_float(image):
    image_np = image.numpy().astype('float32')
    return image_np

def detach_to_cpu(x):
    return x.detach().cpu()

def transpose_np(x):
    return np.transpose(x, [1,2,0])

def tensor_to_gray_im(x):
    x = detach_to_cpu(x)
    x = tensor_to_numpy(x)
    x = transpose_np(x)
    return x

def tensor_to_seg(x):
    x = detach_to_cpu(x)
    x = inv_seg_trans(x)
    x = tensor_to_numpy(x)
    x = transpose_np(x)
    return x

def tensor_to_im(x):
    x = detach_to_cpu(x)
    x = inv_im_trans(x)
    x = tensor_to_numpy(x)
    x = transpose_np(x)
    return x

# Predefined key <-> caption dict
key_captions = {
    'im': 'Image', 
    'gt': 'GT', 
    'seg': 'Input', 
    'error_map': 'Error map',
}
for k in ['28', '56', '224']:
    key_captions['pred_' + k] = 'Ours-%sx%s' % (k, k)
    key_captions['pred_' + k + '_overlay'] = '%sx%s' % (k, k)

"""
Return an image array with captions
keys in dictionary will be used as caption if not provided
values should contain lists of cv2 images
"""
def get_image_array(images, grid_shape, captions={}):
    w, h = grid_shape
    cate_counts = len(images)
    rows_counts = len(next(iter(images.values())))

    font = cv2.FONT_HERSHEY_SIMPLEX

    output_image = np.zeros([h*(rows_counts+1), w*cate_counts, 3], dtype=np.uint8)
    col_cnt = 0
    for k, v in images.items():

        # Default as key value itself
        caption = captions.get(k, k)

        # Handles new line character
        y0, dy = h-10-len(caption.split('\n'))*40, 40
        for i, line in enumerate(caption.split('\n')):
            y = y0 + i*dy
            cv2.putText(output_image, line, (col_cnt*w, y),
                     font, 0.8, (255,255,255), 2, cv2.LINE_AA)

        # Put images
        for row_cnt, img in enumerate(v):
            im_shape = img.shape
            if len(im_shape) == 2:
                img = img[..., np.newaxis]

            img = (img * 255).astype('uint8')

            output_image[(row_cnt+1)*h:(row_cnt+2)*h,
                         col_cnt*w:(col_cnt+1)*w, :] = img
            
        col_cnt += 1

    return output_image

"""
Create an image array, transform each image separately as needed
Will only put images in req_keys
"""
def pool_images(images, req_keys, row_cnt=10):
    req_images = {}

    def base_transform(im):
        im = tensor_to_np_float(im)
        im = im.transpose((1, 2, 0))

        # Resize
        if im.shape[1] != 224:
            im = cv2.resize(im, (224, 224), interpolation=cv2.INTER_NEAREST)

        if len(im.shape) == 2:
            im = im[..., np.newaxis]

        return im

    second_pass_keys = []
    for k in req_keys:

        if 'overlay' in k: 
            # Run overlay in the second pass, skip for now
            second_pass_keys.append(k)

            # Make sure the base key information is transformed
            base_key = k.replace('_overlay', '')
            if base_key in req_keys:
                continue
            else:
                k = base_key

        req_images[k] = []

        images[k] = detach_to_cpu(images[k])
        for i in range(min(row_cnt, len(images[k]))):

            im = images[k][i]

            # Handles inverse transform
            if k in ['im']:
                im = inv_im_trans(images[k][i])
            elif k in ['seg']:
                im = inv_seg_trans(images[k][i])

            # Now we are all numpy array
            im = base_transform(im)

            req_images[k].append(im)

    # Handle overlay images in the second pass
    for k in second_pass_keys:
        req_images[k] = []
        base_key = k.replace('_overlay', '')
        for i in range(min(row_cnt, len(images[base_key]))):

            # If overlay
            im = req_images[base_key][i]
            raw = req_images['im'][i]

            im = im.clip(0, 1)

            # Just red overlay
            im = (raw*0.5 + 0.5 * (raw * (1-im) 
                    + im * (np.array([1,0,0],dtype=np.float32)
                    .reshape([1,1,3]))))
            
            req_images[k].append(im)
    
    # Remove all temp items
    output_images = {}
    for k in req_keys:
        output_images[k] = req_images[k]

    return get_image_array(output_images, (224, 224), key_captions)

# Return cv2 image, directly usable for saving
def vis_prediction(images):

    keys = ['im', 'seg', 'gt', 'pred_224', 'pred_224_overlay'] # 'pred_28', 'pred_28_2', 'pred_56', 'pred_28_3', 'pred_56_2', 

    return pool_images(images, keys)