chenming-wu
/

LiDAR-Perfect-Depth

Depth Estimation

monocular-depth

pixel-perfect-depth

Model card Files Files and versions

LiDAR-Perfect-Depth / code /ppd /utils /align_vda.py

chenming-wu's picture

code

436b829 verified 2 days ago

history blame contribute delete

1.69 kB

	import numpy as np

	def align_video_depth(depth_list, INFER_LEN, KEYFRAMES, OVERLAP):
	depth_list = [depth.squeeze().cpu().numpy() for depth in depth_list]

	depth_list_aligned = []
	ref_align = []

	for i in range(0, len(depth_list)):
	if i == 0:
	depth_list_aligned.extend(depth_list[i][j] for j in range(INFER_LEN))
	for kf_id in KEYFRAMES:
	ref_align.append(depth_list[0][kf_id])
	if i != 0:
	cur_align = []
	for k in range(len(KEYFRAMES)):
	cur_align.append(depth_list[i][k])
	scale, shift = compute_scale_and_shift_full(np.concatenate(cur_align),
	np.concatenate(ref_align),
	np.concatenate(np.ones_like(ref_align)==1))

	new_depth = depth_list[i] * scale + shift
	depth_list_aligned.extend(new_depth[j] for j in range(OVERLAP, INFER_LEN))

	ref_align = ref_align[:1]
	for kf_id in KEYFRAMES[1:]:
	ref_align.append(new_depth[kf_id])
	return np.stack(depth_list_aligned, axis=0)


	def compute_scale_and_shift_full(prediction, target, mask):
	prediction = prediction.astype(np.float32)
	target = target.astype(np.float32)
	mask = mask.astype(np.float32)

	a_00 = np.sum(mask * prediction * prediction)
	a_01 = np.sum(mask * prediction)
	a_11 = np.sum(mask)

	b_0 = np.sum(mask * prediction * target)
	b_1 = np.sum(mask * target)

	x_0 = 1
	x_1 = 0

	det = a_00 * a_11 - a_01 * a_01

	if det != 0:
	x_0 = (a_11 * b_0 - a_01 * b_1) / det
	x_1 = (-a_01 * b_0 + a_00 * b_1) / det

	return x_0, x_1