|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from abc import ABCMeta, abstractmethod |
|
|
|
|
|
import numpy as np |
|
|
import torch |
|
|
from mmengine.registry import Registry |
|
|
|
|
|
CAMERAS = Registry('camera') |
|
|
|
|
|
|
|
|
class SingleCameraBase(metaclass=ABCMeta): |
|
|
"""Base class for single camera model. |
|
|
|
|
|
Args: |
|
|
param (dict): Camera parameters |
|
|
|
|
|
Methods: |
|
|
world_to_camera: Project points from world coordinates to camera |
|
|
coordinates |
|
|
camera_to_world: Project points from camera coordinates to world |
|
|
coordinates |
|
|
camera_to_pixel: Project points from camera coordinates to pixel |
|
|
coordinates |
|
|
world_to_pixel: Project points from world coordinates to pixel |
|
|
coordinates |
|
|
""" |
|
|
|
|
|
@abstractmethod |
|
|
def __init__(self, param): |
|
|
"""Load camera parameters and check validity.""" |
|
|
|
|
|
def world_to_camera(self, X): |
|
|
"""Project points from world coordinates to camera coordinates.""" |
|
|
raise NotImplementedError |
|
|
|
|
|
def camera_to_world(self, X): |
|
|
"""Project points from camera coordinates to world coordinates.""" |
|
|
raise NotImplementedError |
|
|
|
|
|
def camera_to_pixel(self, X): |
|
|
"""Project points from camera coordinates to pixel coordinates.""" |
|
|
raise NotImplementedError |
|
|
|
|
|
def world_to_pixel(self, X): |
|
|
"""Project points from world coordinates to pixel coordinates.""" |
|
|
_X = self.world_to_camera(X) |
|
|
return self.camera_to_pixel(_X) |
|
|
|
|
|
|
|
|
@CAMERAS.register_module() |
|
|
class SimpleCamera(SingleCameraBase): |
|
|
"""Camera model to calculate coordinate transformation with given |
|
|
intrinsic/extrinsic camera parameters. |
|
|
|
|
|
Note: |
|
|
The keypoint coordinate should be an np.ndarray with a shape of |
|
|
[...,J, C] where J is the keypoint number of an instance, and C is |
|
|
the coordinate dimension. For example: |
|
|
|
|
|
[J, C]: shape of joint coordinates of a person with J joints. |
|
|
[N, J, C]: shape of a batch of person joint coordinates. |
|
|
[N, T, J, C]: shape of a batch of pose sequences. |
|
|
|
|
|
Args: |
|
|
param (dict): camera parameters including: |
|
|
- R: 3x3, camera rotation matrix (camera-to-world) |
|
|
- T: 3x1, camera translation (camera-to-world) |
|
|
- K: (optional) 2x3, camera intrinsic matrix |
|
|
- k: (optional) nx1, camera radial distortion coefficients |
|
|
- p: (optional) mx1, camera tangential distortion coefficients |
|
|
- f: (optional) 2x1, camera focal length |
|
|
- c: (optional) 2x1, camera center |
|
|
if K is not provided, it will be calculated from f and c. |
|
|
|
|
|
Methods: |
|
|
world_to_camera: Project points from world coordinates to camera |
|
|
coordinates |
|
|
camera_to_pixel: Project points from camera coordinates to pixel |
|
|
coordinates |
|
|
world_to_pixel: Project points from world coordinates to pixel |
|
|
coordinates |
|
|
""" |
|
|
|
|
|
def __init__(self, param): |
|
|
|
|
|
self.param = {} |
|
|
|
|
|
R = np.array(param['R'], dtype=np.float32) |
|
|
T = np.array(param['T'], dtype=np.float32) |
|
|
assert R.shape == (3, 3) |
|
|
assert T.shape == (3, 1) |
|
|
|
|
|
|
|
|
self.param['R_c2w'] = R.T |
|
|
self.param['T_c2w'] = T.T |
|
|
self.param['R_w2c'] = R |
|
|
self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c'] |
|
|
|
|
|
|
|
|
if 'K' in param: |
|
|
K = np.array(param['K'], dtype=np.float32) |
|
|
assert K.shape == (2, 3) |
|
|
self.param['K'] = K.T |
|
|
self.param['f'] = np.array([K[0, 0], K[1, 1]])[:, np.newaxis] |
|
|
self.param['c'] = np.array([K[0, 2], K[1, 2]])[:, np.newaxis] |
|
|
elif 'f' in param and 'c' in param: |
|
|
f = np.array(param['f'], dtype=np.float32) |
|
|
c = np.array(param['c'], dtype=np.float32) |
|
|
assert f.shape == (2, 1) |
|
|
assert c.shape == (2, 1) |
|
|
self.param['K'] = np.concatenate((np.diagflat(f), c), axis=-1).T |
|
|
self.param['f'] = f |
|
|
self.param['c'] = c |
|
|
else: |
|
|
raise ValueError('Camera intrinsic parameters are missing. ' |
|
|
'Either "K" or "f"&"c" should be provided.') |
|
|
|
|
|
|
|
|
if 'k' in param and 'p' in param: |
|
|
self.undistortion = True |
|
|
self.param['k'] = np.array(param['k'], dtype=np.float32).flatten() |
|
|
self.param['p'] = np.array(param['p'], dtype=np.float32).flatten() |
|
|
assert self.param['k'].size in {3, 6} |
|
|
assert self.param['p'].size == 2 |
|
|
else: |
|
|
self.undistortion = False |
|
|
|
|
|
def world_to_camera(self, X): |
|
|
assert isinstance(X, np.ndarray) |
|
|
assert X.ndim >= 2 and X.shape[-1] == 3 |
|
|
return X @ self.param['R_w2c'] + self.param['T_w2c'] |
|
|
|
|
|
def camera_to_world(self, X): |
|
|
assert isinstance(X, np.ndarray) |
|
|
assert X.ndim >= 2 and X.shape[-1] == 3 |
|
|
return X @ self.param['R_c2w'] + self.param['T_c2w'] |
|
|
|
|
|
def camera_to_pixel(self, X): |
|
|
assert isinstance(X, np.ndarray) |
|
|
assert X.ndim >= 2 and X.shape[-1] == 3 |
|
|
|
|
|
_X = X / X[..., 2:] |
|
|
|
|
|
if self.undistortion: |
|
|
k = self.param['k'] |
|
|
p = self.param['p'] |
|
|
_X_2d = _X[..., :2] |
|
|
r2 = (_X_2d**2).sum(-1) |
|
|
radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3])) |
|
|
if k.size == 6: |
|
|
radial /= 1 + sum( |
|
|
(ki * r2**(i + 1) for i, ki in enumerate(k[3:]))) |
|
|
|
|
|
tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1]) |
|
|
|
|
|
_X[..., :2] = _X_2d * (radial + tangential)[..., None] + np.outer( |
|
|
r2, p[::-1]).reshape(_X_2d.shape) |
|
|
return _X @ self.param['K'] |
|
|
|
|
|
def pixel_to_camera(self, X): |
|
|
assert isinstance(X, np.ndarray) |
|
|
assert X.ndim >= 2 and X.shape[-1] == 3 |
|
|
_X = X.copy() |
|
|
_X[:, :2] = (X[:, :2] - self.param['c'].T) / self.param['f'].T * X[:, |
|
|
[2]] |
|
|
return _X |
|
|
|
|
|
|
|
|
@CAMERAS.register_module() |
|
|
class SimpleCameraTorch(SingleCameraBase): |
|
|
"""Camera model to calculate coordinate transformation with given |
|
|
intrinsic/extrinsic camera parameters. |
|
|
|
|
|
Notes: |
|
|
The keypoint coordinate should be an np.ndarray with a shape of |
|
|
[...,J, C] where J is the keypoint number of an instance, and C is |
|
|
the coordinate dimension. For example: |
|
|
|
|
|
[J, C]: shape of joint coordinates of a person with J joints. |
|
|
[N, J, C]: shape of a batch of person joint coordinates. |
|
|
[N, T, J, C]: shape of a batch of pose sequences. |
|
|
|
|
|
Args: |
|
|
param (dict): camera parameters including: |
|
|
- R: 3x3, camera rotation matrix (camera-to-world) |
|
|
- T: 3x1, camera translation (camera-to-world) |
|
|
- K: (optional) 2x3, camera intrinsic matrix |
|
|
- k: (optional) nx1, camera radial distortion coefficients |
|
|
- p: (optional) mx1, camera tangential distortion coefficients |
|
|
- f: (optional) 2x1, camera focal length |
|
|
- c: (optional) 2x1, camera center |
|
|
if K is not provided, it will be calculated from f and c. |
|
|
|
|
|
Methods: |
|
|
world_to_camera: Project points from world coordinates to camera |
|
|
coordinates |
|
|
camera_to_pixel: Project points from camera coordinates to pixel |
|
|
coordinates |
|
|
world_to_pixel: Project points from world coordinates to pixel |
|
|
coordinates |
|
|
""" |
|
|
|
|
|
def __init__(self, param, device): |
|
|
|
|
|
self.param = {} |
|
|
|
|
|
R = torch.tensor(param['R'], device=device) |
|
|
T = torch.tensor(param['T'], device=device) |
|
|
|
|
|
assert R.shape == (3, 3) |
|
|
assert T.shape == (3, 1) |
|
|
|
|
|
|
|
|
self.param['R_c2w'] = R.T |
|
|
self.param['T_c2w'] = T.T |
|
|
self.param['R_w2c'] = R |
|
|
self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c'] |
|
|
|
|
|
|
|
|
if 'K' in param: |
|
|
K = torch.tensor(param['K'], device=device) |
|
|
assert K.shape == (2, 3) |
|
|
self.param['K'] = K.T |
|
|
self.param['f'] = torch.tensor([[K[0, 0]], [K[1, 1]]], |
|
|
device=device) |
|
|
self.param['c'] = torch.tensor([[K[0, 2]], [K[1, 2]]], |
|
|
device=device) |
|
|
elif 'f' in param and 'c' in param: |
|
|
f = torch.tensor(param['f'], device=device) |
|
|
c = torch.tensor(param['c'], device=device) |
|
|
assert f.shape == (2, 1) |
|
|
assert c.shape == (2, 1) |
|
|
self.param['K'] = torch.cat([torch.diagflat(f), c], dim=-1).T |
|
|
self.param['f'] = f |
|
|
self.param['c'] = c |
|
|
else: |
|
|
raise ValueError('Camera intrinsic parameters are missing. ' |
|
|
'Either "K" or "f"&"c" should be provided.') |
|
|
|
|
|
|
|
|
if 'k' in param and 'p' in param: |
|
|
self.undistortion = True |
|
|
self.param['k'] = torch.tensor(param['k'], device=device).view(-1) |
|
|
self.param['p'] = torch.tensor(param['p'], device=device).view(-1) |
|
|
assert len(self.param['k']) in {3, 6} |
|
|
assert len(self.param['p']) == 2 |
|
|
else: |
|
|
self.undistortion = False |
|
|
|
|
|
def world_to_camera(self, X): |
|
|
assert isinstance(X, torch.Tensor) |
|
|
assert X.ndim >= 2 and X.shape[-1] == 3 |
|
|
return X @ self.param['R_w2c'] + self.param['T_w2c'] |
|
|
|
|
|
def camera_to_world(self, X): |
|
|
assert isinstance(X, torch.Tensor) |
|
|
assert X.ndim >= 2 and X.shape[-1] == 3 |
|
|
return X @ self.param['R_c2w'] + self.param['T_c2w'] |
|
|
|
|
|
def camera_to_pixel(self, X): |
|
|
assert isinstance(X, torch.Tensor) |
|
|
assert X.ndim >= 2 and X.shape[-1] == 3 |
|
|
|
|
|
_X = X / X[..., 2:] |
|
|
|
|
|
if self.undistortion: |
|
|
k = self.param['k'] |
|
|
p = self.param['p'] |
|
|
_X_2d = _X[..., :2] |
|
|
r2 = (_X_2d**2).sum(-1) |
|
|
radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3])) |
|
|
if k.size == 6: |
|
|
radial /= 1 + sum( |
|
|
(ki * r2**(i + 1) for i, ki in enumerate(k[3:]))) |
|
|
|
|
|
tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1]) |
|
|
|
|
|
_X[..., :2] = _X_2d * (radial + tangential)[..., None] + torch.ger( |
|
|
r2, p.flip([0])).reshape(_X_2d.shape) |
|
|
return _X @ self.param['K'] |
|
|
|