|
|
import imageio |
|
|
import numpy as np |
|
|
import torch |
|
|
from PIL import Image |
|
|
from scipy.interpolate import UnivariateSpline |
|
|
from scipy.interpolate import interp1d |
|
|
|
|
|
|
|
|
def load_video(video_path): |
|
|
reader = imageio.get_reader(video_path) |
|
|
total_frames = reader.count_frames() |
|
|
frames = [] |
|
|
for i in range(total_frames): |
|
|
frame = reader.get_data(i) |
|
|
frames.append(Image.fromarray(frame)) |
|
|
|
|
|
reader.close() |
|
|
|
|
|
return frames |
|
|
|
|
|
|
|
|
def points_padding(points): |
|
|
padding = torch.ones_like(points)[..., 0:1] |
|
|
points = torch.cat([points, padding], dim=-1) |
|
|
return points |
|
|
|
|
|
|
|
|
def np_points_padding(points): |
|
|
padding = np.ones_like(points)[..., 0:1] |
|
|
points = np.concatenate([points, padding], axis=-1) |
|
|
return points |
|
|
|
|
|
|
|
|
def txt_interpolation(input_list, n, mode='smooth'): |
|
|
x = np.linspace(0, 1, len(input_list)) |
|
|
if mode == 'smooth': |
|
|
f = UnivariateSpline(x, input_list, k=3) |
|
|
elif mode == 'linear': |
|
|
f = interp1d(x, input_list) |
|
|
else: |
|
|
raise KeyError(f"Invalid txt interpolation mode: {mode}") |
|
|
xnew = np.linspace(0, 1, n) |
|
|
ynew = f(xnew) |
|
|
return ynew |
|
|
|
|
|
|
|
|
def traj_map(traj_type): |
|
|
|
|
|
if traj_type == "free1": |
|
|
cam_traj = "free" |
|
|
x_offset = 0.0 |
|
|
y_offset = 0.0 |
|
|
z_offset = 0.0 |
|
|
d_theta = -15.0 |
|
|
d_phi = 45.0 |
|
|
d_r = 1.6 |
|
|
elif traj_type == "free2": |
|
|
cam_traj = "free" |
|
|
x_offset = -0.05 |
|
|
y_offset = 0.0 |
|
|
z_offset = 0.0 |
|
|
d_theta = 0.0 |
|
|
d_phi = -60.0 |
|
|
d_r = 1.0 |
|
|
elif traj_type == "free3": |
|
|
cam_traj = "free" |
|
|
x_offset = -0.25 |
|
|
y_offset = 0.0 |
|
|
z_offset = 0.0 |
|
|
d_theta = 0.0 |
|
|
d_phi = 0.0 |
|
|
d_r = 1.7 |
|
|
elif traj_type == "free4": |
|
|
cam_traj = "free" |
|
|
x_offset = 0.0 |
|
|
y_offset = 0.0 |
|
|
z_offset = 0.0 |
|
|
d_theta = -15.0 |
|
|
d_phi = -60.0 |
|
|
d_r = 0.75 |
|
|
elif traj_type == "free5": |
|
|
cam_traj = "free" |
|
|
x_offset = 0.0 |
|
|
y_offset = 0.0 |
|
|
z_offset = 0.0 |
|
|
d_theta = -15.0 |
|
|
d_phi = -120.0 |
|
|
d_r = 1.6 |
|
|
elif traj_type == "swing1": |
|
|
cam_traj = "swing1" |
|
|
x_offset = 0.0 |
|
|
y_offset = 0.0 |
|
|
z_offset = 0.0 |
|
|
d_theta = 0.0 |
|
|
d_phi = 0.0 |
|
|
d_r = 1.0 |
|
|
elif traj_type == "swing2": |
|
|
cam_traj = "swing2" |
|
|
x_offset = 0.0 |
|
|
y_offset = 0.0 |
|
|
z_offset = 0.0 |
|
|
d_theta = 0.0 |
|
|
d_phi = 0.0 |
|
|
d_r = 1.0 |
|
|
elif traj_type == "orbit": |
|
|
cam_traj = "free" |
|
|
x_offset = 0.0 |
|
|
y_offset = 0.0 |
|
|
z_offset = 0.0 |
|
|
d_theta = 0.0 |
|
|
d_phi = -360.0 |
|
|
d_r = 1.0 |
|
|
else: |
|
|
raise NotImplementedError |
|
|
return cam_traj, x_offset, y_offset, z_offset, d_theta, d_phi, d_r |
|
|
|
|
|
|
|
|
def set_initial_camera(start_elevation, radius): |
|
|
c2w_0 = torch.tensor([[1, 0, 0, 0], |
|
|
[0, 1, 0, 0], |
|
|
[0, 0, 1, -radius], |
|
|
[0, 0, 0, 1]], dtype=torch.float32) |
|
|
elevation_rad = np.deg2rad(start_elevation) |
|
|
R_elevation = torch.tensor([[1, 0, 0, 0], |
|
|
[0, np.cos(-elevation_rad), -np.sin(-elevation_rad), 0], |
|
|
[0, np.sin(-elevation_rad), np.cos(-elevation_rad), 0], |
|
|
[0, 0, 0, 1]], dtype=torch.float32) |
|
|
c2w_0 = R_elevation @ c2w_0 |
|
|
w2c_0 = c2w_0.inverse() |
|
|
|
|
|
return w2c_0, c2w_0 |
|
|
|
|
|
|
|
|
def build_cameras(cam_traj, w2c_0, c2w_0, intrinsic, nframe, focal_length, |
|
|
d_theta, d_phi, d_r, radius, x_offset, y_offset, z_offset): |
|
|
|
|
|
|
|
|
if intrinsic.ndim == 2: |
|
|
intrinsic = intrinsic[None].repeat(nframe, 1, 1) |
|
|
|
|
|
c2ws = [c2w_0] |
|
|
w2cs = [w2c_0] |
|
|
d_thetas, d_phis, d_rs = [], [], [] |
|
|
x_offsets, y_offsets, z_offsets = [], [], [] |
|
|
if cam_traj == "free": |
|
|
for i in range(nframe - 1): |
|
|
coef = (i + 1) / (nframe - 1) |
|
|
d_thetas.append(d_theta * coef) |
|
|
d_phis.append(d_phi * coef) |
|
|
d_rs.append(coef * d_r + (1 - coef) * 1.0) |
|
|
x_offsets.append(radius * x_offset * ((i + 1) / nframe)) |
|
|
y_offsets.append(radius * y_offset * ((i + 1) / nframe)) |
|
|
z_offsets.append(radius * z_offset * ((i + 1) / nframe)) |
|
|
elif cam_traj == "swing1": |
|
|
phis__ = [0, -5, -25, -30, -20, -8, 0] |
|
|
thetas__ = [0, -8, -12, -20, -17, -12, -5, -2, 1, 5, 3, 1, 0] |
|
|
rs__ = [0, 0.2] |
|
|
d_phis = txt_interpolation(phis__, nframe, mode='smooth') |
|
|
d_phis[0] = phis__[0] |
|
|
d_phis[-1] = phis__[-1] |
|
|
d_thetas = txt_interpolation(thetas__, nframe, mode='smooth') |
|
|
d_thetas[0] = thetas__[0] |
|
|
d_thetas[-1] = thetas__[-1] |
|
|
d_rs = txt_interpolation(rs__, nframe, mode='linear') |
|
|
d_rs = 1.0 + d_rs |
|
|
elif cam_traj == "swing2": |
|
|
phis__ = [0, 5, 25, 30, 20, 10, 0] |
|
|
thetas__ = [0, -5, -14, -11, 0, 1, 5, 3, 0] |
|
|
rs__ = [0, -0.03, -0.1, -0.2, -0.17, -0.1, 0] |
|
|
d_phis = txt_interpolation(phis__, nframe, mode='smooth') |
|
|
d_phis[0] = phis__[0] |
|
|
d_phis[-1] = phis__[-1] |
|
|
d_thetas = txt_interpolation(thetas__, nframe, mode='smooth') |
|
|
d_thetas[0] = thetas__[0] |
|
|
d_thetas[-1] = thetas__[-1] |
|
|
d_rs = txt_interpolation(rs__, nframe, mode='smooth') |
|
|
d_rs = 1.0 + d_rs |
|
|
else: |
|
|
raise NotImplementedError("Unknown trajectory type...") |
|
|
|
|
|
for i in range(nframe - 1): |
|
|
d_theta_rad = np.deg2rad(d_thetas[i]) |
|
|
R_theta = torch.tensor([[1, 0, 0, 0], |
|
|
[0, np.cos(d_theta_rad), -np.sin(d_theta_rad), 0], |
|
|
[0, np.sin(d_theta_rad), np.cos(d_theta_rad), 0], |
|
|
[0, 0, 0, 1]], dtype=torch.float32) |
|
|
d_phi_rad = np.deg2rad(d_phis[i]) |
|
|
R_phi = torch.tensor([[np.cos(d_phi_rad), 0, np.sin(d_phi_rad), 0], |
|
|
[0, 1, 0, 0], |
|
|
[-np.sin(d_phi_rad), 0, np.cos(d_phi_rad), 0], |
|
|
[0, 0, 0, 1]], dtype=torch.float32) |
|
|
c2w_1 = R_phi @ R_theta @ c2w_0 |
|
|
if i < len(x_offsets) and i < len(y_offsets) and i < len(z_offsets): |
|
|
c2w_1[:3, -1] += torch.tensor([x_offsets[i], y_offsets[i], z_offsets[i]]) |
|
|
c2w_1[:3, -1] *= d_rs[i] |
|
|
w2c_1 = c2w_1.inverse() |
|
|
c2ws.append(c2w_1) |
|
|
w2cs.append(w2c_1) |
|
|
|
|
|
intrinsic[i + 1, :2, :2] = intrinsic[i + 1, :2, :2] * focal_length * ((i + 1) / nframe) + \ |
|
|
intrinsic[i + 1, :2, :2] * ((nframe - (i + 1)) / nframe) |
|
|
|
|
|
w2cs = torch.stack(w2cs, dim=0) |
|
|
c2ws = torch.stack(c2ws, dim=0) |
|
|
|
|
|
return w2cs, c2ws, intrinsic |
|
|
|