Spaces:

bulatko
/

zoo3d

Paused

App Files Files Community

zoo3d / MaskClustering /third_party /Entity /High-Quality-Segmention /util /image_saver_crm.py

drozdgk

chore: vendor third_party (remove submodules, ignore artifacts)

352cafd 8 days ago

raw

history blame contribute delete

4.91 kB

	import cv2
	import numpy as np

	import torchvision.transforms as transforms

	inv_im_trans = transforms.Normalize(
	mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
	std=[1/0.229, 1/0.224, 1/0.225])

	inv_seg_trans = transforms.Normalize(
	mean=[-0.5/0.5],
	std=[1/0.5])

	def tensor_to_numpy(image):
	image_np = (image.numpy() * 255).astype('uint8')
	return image_np

	def tensor_to_np_float(image):
	image_np = image.numpy().astype('float32')
	return image_np

	def detach_to_cpu(x):
	return x.detach().cpu()

	def transpose_np(x):
	return np.transpose(x, [1,2,0])

	def tensor_to_gray_im(x):
	x = detach_to_cpu(x)
	x = tensor_to_numpy(x)
	x = transpose_np(x)
	return x

	def tensor_to_seg(x):
	x = detach_to_cpu(x)
	x = inv_seg_trans(x)
	x = tensor_to_numpy(x)
	x = transpose_np(x)
	return x

	def tensor_to_im(x):
	x = detach_to_cpu(x)
	x = inv_im_trans(x)
	x = tensor_to_numpy(x)
	x = transpose_np(x)
	return x

	# Predefined key <-> caption dict
	key_captions = {
	'im': 'Image',
	'gt': 'GT',
	'seg': 'Input',
	'error_map': 'Error map',
	}
	for k in ['28', '56', '224']:
	key_captions['pred_' + k] = 'Ours-%sx%s' % (k, k)
	key_captions['pred_' + k + '_overlay'] = '%sx%s' % (k, k)

	"""
	Return an image array with captions
	keys in dictionary will be used as caption if not provided
	values should contain lists of cv2 images
	"""
	def get_image_array(images, grid_shape, captions={}):
	w, h = grid_shape
	cate_counts = len(images)
	rows_counts = len(next(iter(images.values())))

	font = cv2.FONT_HERSHEY_SIMPLEX

	output_image = np.zeros([h(rows_counts+1), wcate_counts, 3], dtype=np.uint8)
	col_cnt = 0
	for k, v in images.items():

	# Default as key value itself
	caption = captions.get(k, k)

	# Handles new line character
	y0, dy = h-10-len(caption.split('\n'))*40, 40
	for i, line in enumerate(caption.split('\n')):
	y = y0 + i*dy
	cv2.putText(output_image, line, (col_cnt*w, y),
	font, 0.8, (255,255,255), 2, cv2.LINE_AA)

	# Put images
	for row_cnt, img in enumerate(v):
	im_shape = img.shape
	if len(im_shape) == 2:
	img = img[..., np.newaxis]

	img = (img * 255).astype('uint8')

	output_image[(row_cnt+1)h:(row_cnt+2)h,
	col_cntw:(col_cnt+1)w, :] = img

	col_cnt += 1

	return output_image

	"""
	Create an image array, transform each image separately as needed
	Will only put images in req_keys
	"""
	def pool_images(images, req_keys, row_cnt=10):
	req_images = {}

	def base_transform(im):
	im = tensor_to_np_float(im)
	im = im.transpose((1, 2, 0))

	# Resize
	if im.shape[1] != 224:
	im = cv2.resize(im, (224, 224), interpolation=cv2.INTER_NEAREST)

	if len(im.shape) == 2:
	im = im[..., np.newaxis]

	return im

	second_pass_keys = []
	for k in req_keys:

	if 'overlay' in k:
	# Run overlay in the second pass, skip for now
	second_pass_keys.append(k)

	# Make sure the base key information is transformed
	base_key = k.replace('_overlay', '')
	if base_key in req_keys:
	continue
	else:
	k = base_key

	req_images[k] = []

	images[k] = detach_to_cpu(images[k])
	for i in range(min(row_cnt, len(images[k]))):

	im = images[k][i]

	# Handles inverse transform
	if k in ['im']:
	im = inv_im_trans(images[k][i])
	elif k in ['seg']:
	im = inv_seg_trans(images[k][i])

	# Now we are all numpy array
	im = base_transform(im)

	req_images[k].append(im)

	# Handle overlay images in the second pass
	for k in second_pass_keys:
	req_images[k] = []
	base_key = k.replace('_overlay', '')
	for i in range(min(row_cnt, len(images[base_key]))):

	# If overlay
	im = req_images[base_key][i]
	raw = req_images['im'][i]

	im = im.clip(0, 1)

	# Just red overlay
	im = (raw0.5 + 0.5 (raw * (1-im)
	+ im * (np.array([1,0,0],dtype=np.float32)
	.reshape([1,1,3]))))

	req_images[k].append(im)

	# Remove all temp items
	output_images = {}
	for k in req_keys:
	output_images[k] = req_images[k]

	return get_image_array(output_images, (224, 224), key_captions)

	# Return cv2 image, directly usable for saving
	def vis_prediction(images):

	keys = ['im', 'seg', 'gt', 'pred_224', 'pred_224_overlay'] # 'pred_28', 'pred_28_2', 'pred_56', 'pred_28_3', 'pred_56_2',

	return pool_images(images, keys)