Spaces:

VAST-AI
/

AniGen

Running on Zero

App Files Files Community

AniGen / anigen /utils /general_utils.py

Yihua7

Initial commit: AniGen - Animatable 3D Generation

6b92ff7 3 days ago

raw

history blame contribute delete

8.88 kB

	import re
	import numpy as np
	import cv2
	import torch
	import contextlib


	# Dictionary utils
	def _dict_merge(dicta, dictb, prefix=''):
	"""
	Merge two dictionaries.
	"""
	assert isinstance(dicta, dict), 'input must be a dictionary'
	assert isinstance(dictb, dict), 'input must be a dictionary'
	dict_ = {}
	all_keys = set(dicta.keys()).union(set(dictb.keys()))
	for key in all_keys:
	if key in dicta.keys() and key in dictb.keys():
	if isinstance(dicta[key], dict) and isinstance(dictb[key], dict):
	dict_[key] = _dict_merge(dicta[key], dictb[key], prefix=f'{prefix}.{key}')
	else:
	raise ValueError(f'Duplicate key {prefix}.{key} found in both dictionaries. Types: {type(dicta[key])}, {type(dictb[key])}')
	elif key in dicta.keys():
	dict_[key] = dicta[key]
	else:
	dict_[key] = dictb[key]
	return dict_


	def dict_merge(dicta, dictb):
	"""
	Merge two dictionaries.
	"""
	return _dict_merge(dicta, dictb, prefix='')


	def dict_foreach(dic, func, special_func={}):
	"""
	Recursively apply a function to all non-dictionary leaf values in a dictionary.
	"""
	assert isinstance(dic, dict), 'input must be a dictionary'
	for key in dic.keys():
	if isinstance(dic[key], dict):
	dic[key] = dict_foreach(dic[key], func)
	else:
	if key in special_func.keys():
	dic[key] = special_func[key](dic[key])
	else:
	dic[key] = func(dic[key])
	return dic


	def dict_reduce(dicts, func, special_func={}):
	"""
	Reduce a list of dictionaries. Leaf values must be scalars.
	"""
	assert isinstance(dicts, list), 'input must be a list of dictionaries'
	assert all([isinstance(d, dict) for d in dicts]), 'input must be a list of dictionaries'
	assert len(dicts) > 0, 'input must be a non-empty list of dictionaries'
	all_keys = set([key for dict_ in dicts for key in dict_.keys()])
	reduced_dict = {}
	for key in all_keys:
	vlist = [dict_[key] for dict_ in dicts if key in dict_.keys()]
	if isinstance(vlist[0], dict):
	reduced_dict[key] = dict_reduce(vlist, func, special_func)
	else:
	if key in special_func.keys():
	reduced_dict[key] = special_func[key](vlist)
	else:
	reduced_dict[key] = func(vlist)
	return reduced_dict


	def dict_any(dic, func):
	"""
	Recursively apply a function to all non-dictionary leaf values in a dictionary.
	"""
	assert isinstance(dic, dict), 'input must be a dictionary'
	for key in dic.keys():
	if isinstance(dic[key], dict):
	if dict_any(dic[key], func):
	return True
	else:
	if func(dic[key]):
	return True
	return False


	def dict_all(dic, func):
	"""
	Recursively apply a function to all non-dictionary leaf values in a dictionary.
	"""
	assert isinstance(dic, dict), 'input must be a dictionary'
	for key in dic.keys():
	if isinstance(dic[key], dict):
	if not dict_all(dic[key], func):
	return False
	else:
	if not func(dic[key]):
	return False
	return True


	def _keep_largest_connected_component_3d(mask_3d: "np.ndarray") -> "np.ndarray":
	"""Keep only the largest connected component (6-neighborhood) in a 3D boolean mask."""
	if mask_3d is None:
	return mask_3d
	mask_3d = np.asarray(mask_3d).astype(bool)
	if mask_3d.ndim != 3:
	raise ValueError(f"mask_3d must be 3D, got shape={mask_3d.shape}")
	if not mask_3d.any():
	return mask_3d

	# Fast path if SciPy is available.
	try:
	import scipy.ndimage as ndi # type: ignore

	structure = np.zeros((3, 3, 3), dtype=np.int8)
	structure[1, 1, 1] = 1
	structure[0, 1, 1] = 1
	structure[2, 1, 1] = 1
	structure[1, 0, 1] = 1
	structure[1, 2, 1] = 1
	structure[1, 1, 0] = 1
	structure[1, 1, 2] = 1

	labeled, num = ndi.label(mask_3d, structure=structure)
	if num <= 1:
	return mask_3d
	counts = np.bincount(labeled.reshape(-1))
	counts[0] = 0 # background
	keep = int(np.argmax(counts))
	return labeled == keep
	except Exception:
	pass

	# Fallback: BFS connected components (6-neighborhood).
	from collections import deque

	d, h, w = mask_3d.shape
	labels = np.full((d, h, w), -1, dtype=np.int32)
	nbrs = ((-1, 0, 0), (1, 0, 0), (0, -1, 0), (0, 1, 0), (0, 0, -1), (0, 0, 1))

	cur_label = 0
	best_label = -1
	best_size = 0

	seeds = np.argwhere(mask_3d)
	for z0, y0, x0 in seeds:
	if labels[z0, y0, x0] != -1:
	continue
	q = deque()
	q.append((int(z0), int(y0), int(x0)))
	labels[z0, y0, x0] = cur_label
	size = 0
	while q:
	z, y, x = q.popleft()
	size += 1
	for dz, dy, dx in nbrs:
	zz = z + dz
	yy = y + dy
	xx = x + dx
	if zz < 0 or zz >= d or yy < 0 or yy >= h or xx < 0 or xx >= w:
	continue
	if not mask_3d[zz, yy, xx]:
	continue
	if labels[zz, yy, xx] != -1:
	continue
	labels[zz, yy, xx] = cur_label
	q.append((zz, yy, xx))
	if size > best_size:
	best_size = size
	best_label = cur_label
	cur_label += 1

	if best_label < 0:
	return mask_3d
	return labels == best_label


	def _final_joint_count(joints) -> int:
	"""Return joint count for common joint tensor/array shapes.

	Accepts: numpy arrays, torch tensors, lists.
	Expected shapes: [J, 3/4], [1, J, 3/4].
	"""
	if joints is None:
	return 0
	if isinstance(joints, torch.Tensor):
	j = joints.detach()
	if j.ndim == 3 and j.shape[0] == 1:
	return int(j.shape[1])
	if j.ndim >= 2:
	return int(j.shape[0])
	return int(j.numel())

	j = np.asarray(joints)
	if j.ndim == 3 and j.shape[0] == 1:
	return int(j.shape[1])
	if j.ndim >= 2:
	return int(j.shape[0])
	return int(j.size)


	# Context utils
	@contextlib.contextmanager
	def nested_contexts(*contexts):
	with contextlib.ExitStack() as stack:
	for ctx in contexts:
	stack.enter_context(ctx())
	yield


	# Image utils
	def make_grid(images, nrow=None, ncol=None, aspect_ratio=None):
	num_images = len(images)
	if nrow is None and ncol is None:
	if aspect_ratio is not None:
	nrow = int(np.round(np.sqrt(num_images / aspect_ratio)))
	else:
	nrow = int(np.sqrt(num_images))
	ncol = (num_images + nrow - 1) // nrow
	elif nrow is None and ncol is not None:
	nrow = (num_images + ncol - 1) // ncol
	elif nrow is not None and ncol is None:
	ncol = (num_images + nrow - 1) // nrow
	else:
	assert nrow * ncol >= num_images, 'nrow * ncol must be greater than or equal to the number of images'

	if images[0].ndim == 2:
	grid = np.zeros((nrow * images[0].shape[0], ncol * images[0].shape[1]), dtype=images[0].dtype)
	else:
	grid = np.zeros((nrow * images[0].shape[0], ncol * images[0].shape[1], images[0].shape[2]), dtype=images[0].dtype)
	for i, img in enumerate(images):
	row = i // ncol
	col = i % ncol
	grid[row * img.shape[0]:(row + 1) * img.shape[0], col * img.shape[1]:(col + 1) * img.shape[1]] = img
	return grid


	def notes_on_image(img, notes=None):
	img = np.pad(img, ((0, 32), (0, 0), (0, 0)), 'constant', constant_values=0)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
	if notes is not None:
	img = cv2.putText(img, notes, (0, img.shape[0] - 4), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 1)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	return img


	def save_image_with_notes(img, path, notes=None):
	"""
	Save an image with notes.
	"""
	if isinstance(img, torch.Tensor):
	img = img.cpu().numpy().transpose(1, 2, 0)
	if img.dtype == np.float32 or img.dtype == np.float64:
	img = np.clip(img * 255, 0, 255).astype(np.uint8)
	img = notes_on_image(img, notes)
	cv2.imwrite(path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))


	# debug utils

	def atol(x, y):
	"""
	Absolute tolerance.
	"""
	return torch.abs(x - y)


	def rtol(x, y):
	"""
	Relative tolerance.
	"""
	return torch.abs(x - y) / torch.clamp_min(torch.maximum(torch.abs(x), torch.abs(y)), 1e-12)


	# print utils
	def indent(s, n=4):
	"""
	Indent a string.
	"""
	lines = s.split('\n')
	for i in range(1, len(lines)):
	lines[i] = ' ' * n + lines[i]
	return '\n'.join(lines)