idol-models / submodule /sapiens /pose /mmpose /utils /camera.py

Upload folder using huggingface_hub

789eef1 verified 6 months ago

10.8 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	from abc import ABCMeta, abstractmethod

	import numpy as np
	import torch
	from mmengine.registry import Registry

	CAMERAS = Registry('camera')


	class SingleCameraBase(metaclass=ABCMeta):
	"""Base class for single camera model.

	Args:
	param (dict): Camera parameters

	Methods:
	world_to_camera: Project points from world coordinates to camera
	coordinates
	camera_to_world: Project points from camera coordinates to world
	coordinates
	camera_to_pixel: Project points from camera coordinates to pixel
	coordinates
	world_to_pixel: Project points from world coordinates to pixel
	coordinates
	"""

	@abstractmethod
	def __init__(self, param):
	"""Load camera parameters and check validity."""

	def world_to_camera(self, X):
	"""Project points from world coordinates to camera coordinates."""
	raise NotImplementedError

	def camera_to_world(self, X):
	"""Project points from camera coordinates to world coordinates."""
	raise NotImplementedError

	def camera_to_pixel(self, X):
	"""Project points from camera coordinates to pixel coordinates."""
	raise NotImplementedError

	def world_to_pixel(self, X):
	"""Project points from world coordinates to pixel coordinates."""
	_X = self.world_to_camera(X)
	return self.camera_to_pixel(_X)


	@CAMERAS.register_module()
	class SimpleCamera(SingleCameraBase):
	"""Camera model to calculate coordinate transformation with given
	intrinsic/extrinsic camera parameters.

	Note:
	The keypoint coordinate should be an np.ndarray with a shape of
	[...,J, C] where J is the keypoint number of an instance, and C is
	the coordinate dimension. For example:

	[J, C]: shape of joint coordinates of a person with J joints.
	[N, J, C]: shape of a batch of person joint coordinates.
	[N, T, J, C]: shape of a batch of pose sequences.

	Args:
	param (dict): camera parameters including:
	- R: 3x3, camera rotation matrix (camera-to-world)
	- T: 3x1, camera translation (camera-to-world)
	- K: (optional) 2x3, camera intrinsic matrix
	- k: (optional) nx1, camera radial distortion coefficients
	- p: (optional) mx1, camera tangential distortion coefficients
	- f: (optional) 2x1, camera focal length
	- c: (optional) 2x1, camera center
	if K is not provided, it will be calculated from f and c.

	Methods:
	world_to_camera: Project points from world coordinates to camera
	coordinates
	camera_to_pixel: Project points from camera coordinates to pixel
	coordinates
	world_to_pixel: Project points from world coordinates to pixel
	coordinates
	"""

	def __init__(self, param):

	self.param = {}
	# extrinsic param
	R = np.array(param['R'], dtype=np.float32)
	T = np.array(param['T'], dtype=np.float32)
	assert R.shape == (3, 3)
	assert T.shape == (3, 1)
	# The camera matrices are transposed in advance because the joint
	# coordinates are stored as row vectors.
	self.param['R_c2w'] = R.T
	self.param['T_c2w'] = T.T
	self.param['R_w2c'] = R
	self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c']

	# intrinsic param
	if 'K' in param:
	K = np.array(param['K'], dtype=np.float32)
	assert K.shape == (2, 3)
	self.param['K'] = K.T
	self.param['f'] = np.array([K[0, 0], K[1, 1]])[:, np.newaxis]
	self.param['c'] = np.array([K[0, 2], K[1, 2]])[:, np.newaxis]
	elif 'f' in param and 'c' in param:
	f = np.array(param['f'], dtype=np.float32)
	c = np.array(param['c'], dtype=np.float32)
	assert f.shape == (2, 1)
	assert c.shape == (2, 1)
	self.param['K'] = np.concatenate((np.diagflat(f), c), axis=-1).T
	self.param['f'] = f
	self.param['c'] = c
	else:
	raise ValueError('Camera intrinsic parameters are missing. '
	'Either "K" or "f"&"c" should be provided.')

	# distortion param
	if 'k' in param and 'p' in param:
	self.undistortion = True
	self.param['k'] = np.array(param['k'], dtype=np.float32).flatten()
	self.param['p'] = np.array(param['p'], dtype=np.float32).flatten()
	assert self.param['k'].size in {3, 6}
	assert self.param['p'].size == 2
	else:
	self.undistortion = False

	def world_to_camera(self, X):
	assert isinstance(X, np.ndarray)
	assert X.ndim >= 2 and X.shape[-1] == 3
	return X @ self.param['R_w2c'] + self.param['T_w2c']

	def camera_to_world(self, X):
	assert isinstance(X, np.ndarray)
	assert X.ndim >= 2 and X.shape[-1] == 3
	return X @ self.param['R_c2w'] + self.param['T_c2w']

	def camera_to_pixel(self, X):
	assert isinstance(X, np.ndarray)
	assert X.ndim >= 2 and X.shape[-1] == 3

	_X = X / X[..., 2:]

	if self.undistortion:
	k = self.param['k']
	p = self.param['p']
	_X_2d = _X[..., :2]
	r2 = (_X_2d**2).sum(-1)
	radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3]))
	if k.size == 6:
	radial /= 1 + sum(
	(ki * r2**(i + 1) for i, ki in enumerate(k[3:])))

	tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1])

	_X[..., :2] = _X_2d * (radial + tangential)[..., None] + np.outer(
	r2, p[::-1]).reshape(_X_2d.shape)
	return _X @ self.param['K']

	def pixel_to_camera(self, X):
	assert isinstance(X, np.ndarray)
	assert X.ndim >= 2 and X.shape[-1] == 3
	_X = X.copy()
	_X[:, :2] = (X[:, :2] - self.param['c'].T) / self.param['f'].T * X[:,
	[2]]
	return _X


	@CAMERAS.register_module()
	class SimpleCameraTorch(SingleCameraBase):
	"""Camera model to calculate coordinate transformation with given
	intrinsic/extrinsic camera parameters.

	Notes:
	The keypoint coordinate should be an np.ndarray with a shape of
	[...,J, C] where J is the keypoint number of an instance, and C is
	the coordinate dimension. For example:

	[J, C]: shape of joint coordinates of a person with J joints.
	[N, J, C]: shape of a batch of person joint coordinates.
	[N, T, J, C]: shape of a batch of pose sequences.

	Args:
	param (dict): camera parameters including:
	- R: 3x3, camera rotation matrix (camera-to-world)
	- T: 3x1, camera translation (camera-to-world)
	- K: (optional) 2x3, camera intrinsic matrix
	- k: (optional) nx1, camera radial distortion coefficients
	- p: (optional) mx1, camera tangential distortion coefficients
	- f: (optional) 2x1, camera focal length
	- c: (optional) 2x1, camera center
	if K is not provided, it will be calculated from f and c.

	Methods:
	world_to_camera: Project points from world coordinates to camera
	coordinates
	camera_to_pixel: Project points from camera coordinates to pixel
	coordinates
	world_to_pixel: Project points from world coordinates to pixel
	coordinates
	"""

	def __init__(self, param, device):

	self.param = {}
	# extrinsic param
	R = torch.tensor(param['R'], device=device)
	T = torch.tensor(param['T'], device=device)

	assert R.shape == (3, 3)
	assert T.shape == (3, 1)
	# The camera matrices are transposed in advance because the joint
	# coordinates are stored as row vectors.
	self.param['R_c2w'] = R.T
	self.param['T_c2w'] = T.T
	self.param['R_w2c'] = R
	self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c']

	# intrinsic param
	if 'K' in param:
	K = torch.tensor(param['K'], device=device)
	assert K.shape == (2, 3)
	self.param['K'] = K.T
	self.param['f'] = torch.tensor([[K[0, 0]], [K[1, 1]]],
	device=device)
	self.param['c'] = torch.tensor([[K[0, 2]], [K[1, 2]]],
	device=device)
	elif 'f' in param and 'c' in param:
	f = torch.tensor(param['f'], device=device)
	c = torch.tensor(param['c'], device=device)
	assert f.shape == (2, 1)
	assert c.shape == (2, 1)
	self.param['K'] = torch.cat([torch.diagflat(f), c], dim=-1).T
	self.param['f'] = f
	self.param['c'] = c
	else:
	raise ValueError('Camera intrinsic parameters are missing. '
	'Either "K" or "f"&"c" should be provided.')

	# distortion param
	if 'k' in param and 'p' in param:
	self.undistortion = True
	self.param['k'] = torch.tensor(param['k'], device=device).view(-1)
	self.param['p'] = torch.tensor(param['p'], device=device).view(-1)
	assert len(self.param['k']) in {3, 6}
	assert len(self.param['p']) == 2
	else:
	self.undistortion = False

	def world_to_camera(self, X):
	assert isinstance(X, torch.Tensor)
	assert X.ndim >= 2 and X.shape[-1] == 3
	return X @ self.param['R_w2c'] + self.param['T_w2c']

	def camera_to_world(self, X):
	assert isinstance(X, torch.Tensor)
	assert X.ndim >= 2 and X.shape[-1] == 3
	return X @ self.param['R_c2w'] + self.param['T_c2w']

	def camera_to_pixel(self, X):
	assert isinstance(X, torch.Tensor)
	assert X.ndim >= 2 and X.shape[-1] == 3

	_X = X / X[..., 2:]

	if self.undistortion:
	k = self.param['k']
	p = self.param['p']
	_X_2d = _X[..., :2]
	r2 = (_X_2d**2).sum(-1)
	radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3]))
	if k.size == 6:
	radial /= 1 + sum(
	(ki * r2**(i + 1) for i, ki in enumerate(k[3:])))

	tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1])

	_X[..., :2] = _X_2d * (radial + tangential)[..., None] + torch.ger(
	r2, p.flip([0])).reshape(_X_2d.shape)
	return _X @ self.param['K']