| import numpy as np | |
| def align_video_depth(depth_list, INFER_LEN, KEYFRAMES, OVERLAP): | |
| depth_list = [depth.squeeze().cpu().numpy() for depth in depth_list] | |
| depth_list_aligned = [] | |
| ref_align = [] | |
| for i in range(0, len(depth_list)): | |
| if i == 0: | |
| depth_list_aligned.extend(depth_list[i][j] for j in range(INFER_LEN)) | |
| for kf_id in KEYFRAMES: | |
| ref_align.append(depth_list[0][kf_id]) | |
| if i != 0: | |
| cur_align = [] | |
| for k in range(len(KEYFRAMES)): | |
| cur_align.append(depth_list[i][k]) | |
| scale, shift = compute_scale_and_shift_full(np.concatenate(cur_align), | |
| np.concatenate(ref_align), | |
| np.concatenate(np.ones_like(ref_align)==1)) | |
| new_depth = depth_list[i] * scale + shift | |
| depth_list_aligned.extend(new_depth[j] for j in range(OVERLAP, INFER_LEN)) | |
| ref_align = ref_align[:1] | |
| for kf_id in KEYFRAMES[1:]: | |
| ref_align.append(new_depth[kf_id]) | |
| return np.stack(depth_list_aligned, axis=0) | |
| def compute_scale_and_shift_full(prediction, target, mask): | |
| prediction = prediction.astype(np.float32) | |
| target = target.astype(np.float32) | |
| mask = mask.astype(np.float32) | |
| a_00 = np.sum(mask * prediction * prediction) | |
| a_01 = np.sum(mask * prediction) | |
| a_11 = np.sum(mask) | |
| b_0 = np.sum(mask * prediction * target) | |
| b_1 = np.sum(mask * target) | |
| x_0 = 1 | |
| x_1 = 0 | |
| det = a_00 * a_11 - a_01 * a_01 | |
| if det != 0: | |
| x_0 = (a_11 * b_0 - a_01 * b_1) / det | |
| x_1 = (-a_01 * b_0 + a_00 * b_1) / det | |
| return x_0, x_1 |