drozdgk's picture
chore: vendor third_party (remove submodules, ignore artifacts)
352cafd
import cv2
import numpy as np
import torchvision.transforms as transforms
inv_im_trans = transforms.Normalize(
mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
std=[1/0.229, 1/0.224, 1/0.225])
inv_seg_trans = transforms.Normalize(
mean=[-0.5/0.5],
std=[1/0.5])
def tensor_to_numpy(image):
image_np = (image.numpy() * 255).astype('uint8')
return image_np
def tensor_to_np_float(image):
image_np = image.numpy().astype('float32')
return image_np
def detach_to_cpu(x):
return x.detach().cpu()
def transpose_np(x):
return np.transpose(x, [1,2,0])
def tensor_to_gray_im(x):
x = detach_to_cpu(x)
x = tensor_to_numpy(x)
x = transpose_np(x)
return x
def tensor_to_seg(x):
x = detach_to_cpu(x)
x = inv_seg_trans(x)
x = tensor_to_numpy(x)
x = transpose_np(x)
return x
def tensor_to_im(x):
x = detach_to_cpu(x)
x = inv_im_trans(x)
x = tensor_to_numpy(x)
x = transpose_np(x)
return x
# Predefined key <-> caption dict
key_captions = {
'im': 'Image',
'gt': 'GT',
'seg': 'Input',
'error_map': 'Error map',
}
for k in ['28', '56', '224']:
key_captions['pred_' + k] = 'Ours-%sx%s' % (k, k)
key_captions['pred_' + k + '_overlay'] = '%sx%s' % (k, k)
"""
Return an image array with captions
keys in dictionary will be used as caption if not provided
values should contain lists of cv2 images
"""
def get_image_array(images, grid_shape, captions={}):
w, h = grid_shape
cate_counts = len(images)
rows_counts = len(next(iter(images.values())))
font = cv2.FONT_HERSHEY_SIMPLEX
output_image = np.zeros([h*(rows_counts+1), w*cate_counts, 3], dtype=np.uint8)
col_cnt = 0
for k, v in images.items():
# Default as key value itself
caption = captions.get(k, k)
# Handles new line character
y0, dy = h-10-len(caption.split('\n'))*40, 40
for i, line in enumerate(caption.split('\n')):
y = y0 + i*dy
cv2.putText(output_image, line, (col_cnt*w, y),
font, 0.8, (255,255,255), 2, cv2.LINE_AA)
# Put images
for row_cnt, img in enumerate(v):
im_shape = img.shape
if len(im_shape) == 2:
img = img[..., np.newaxis]
img = (img * 255).astype('uint8')
output_image[(row_cnt+1)*h:(row_cnt+2)*h,
col_cnt*w:(col_cnt+1)*w, :] = img
col_cnt += 1
return output_image
"""
Create an image array, transform each image separately as needed
Will only put images in req_keys
"""
def pool_images(images, req_keys, row_cnt=10):
req_images = {}
def base_transform(im):
im = tensor_to_np_float(im)
im = im.transpose((1, 2, 0))
# Resize
if im.shape[1] != 224:
im = cv2.resize(im, (224, 224), interpolation=cv2.INTER_NEAREST)
if len(im.shape) == 2:
im = im[..., np.newaxis]
return im
second_pass_keys = []
for k in req_keys:
if 'overlay' in k:
# Run overlay in the second pass, skip for now
second_pass_keys.append(k)
# Make sure the base key information is transformed
base_key = k.replace('_overlay', '')
if base_key in req_keys:
continue
else:
k = base_key
req_images[k] = []
images[k] = detach_to_cpu(images[k])
for i in range(min(row_cnt, len(images[k]))):
im = images[k][i]
# Handles inverse transform
if k in ['im']:
im = inv_im_trans(images[k][i])
elif k in ['seg']:
im = inv_seg_trans(images[k][i])
# Now we are all numpy array
im = base_transform(im)
req_images[k].append(im)
# Handle overlay images in the second pass
for k in second_pass_keys:
req_images[k] = []
base_key = k.replace('_overlay', '')
for i in range(min(row_cnt, len(images[base_key]))):
# If overlay
im = req_images[base_key][i]
raw = req_images['im'][i]
im = im.clip(0, 1)
# Just red overlay
im = (raw*0.5 + 0.5 * (raw * (1-im)
+ im * (np.array([1,0,0],dtype=np.float32)
.reshape([1,1,3]))))
req_images[k].append(im)
# Remove all temp items
output_images = {}
for k in req_keys:
output_images[k] = req_images[k]
return get_image_array(output_images, (224, 224), key_captions)
# Return cv2 image, directly usable for saving
def vis_prediction(images):
keys = ['im', 'seg', 'gt', 'pred_224', 'pred_224_overlay'] # 'pred_28', 'pred_28_2', 'pred_56', 'pred_28_3', 'pred_56_2',
return pool_images(images, keys)