Spaces:
Build error
Build error
| import numpy as np | |
| import torch | |
| import fusion | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| def read_calib(calib_path): | |
| """ | |
| Modify from https://github.com/utiasSTARS/pykitti/blob/d3e1bb81676e831886726cc5ed79ce1f049aef2c/pykitti/utils.py#L68 | |
| :param calib_path: Path to a calibration text file. | |
| :return: dict with calibration matrices. | |
| """ | |
| calib_all = {} | |
| with open(calib_path, "r") as f: | |
| for line in f.readlines(): | |
| if line == "\n": | |
| break | |
| key, value = line.split(":", 1) | |
| calib_all[key] = np.array([float(x) for x in value.split()]) | |
| # reshape matrices | |
| calib_out = {} | |
| # 3x4 projection matrix for left camera | |
| calib_out["P2"] = calib_all["P2"].reshape(3, 4) | |
| calib_out["Tr"] = np.identity(4) # 4x4 matrix | |
| calib_out["Tr"][:3, :4] = calib_all["Tr"].reshape(3, 4) | |
| return calib_out | |
| def vox2pix(cam_E, cam_k, | |
| vox_origin, voxel_size, | |
| img_W, img_H, | |
| scene_size): | |
| """ | |
| compute the 2D projection of voxels centroids | |
| Parameters: | |
| ---------- | |
| cam_E: 4x4 | |
| =camera pose in case of NYUv2 dataset | |
| =Transformation from camera to lidar coordinate in case of SemKITTI | |
| cam_k: 3x3 | |
| camera intrinsics | |
| vox_origin: (3,) | |
| world(NYU)/lidar(SemKITTI) cooridnates of the voxel at index (0, 0, 0) | |
| img_W: int | |
| image width | |
| img_H: int | |
| image height | |
| scene_size: (3,) | |
| scene size in meter: (51.2, 51.2, 6.4) for SemKITTI and (4.8, 4.8, 2.88) for NYUv2 | |
| Returns | |
| ------- | |
| projected_pix: (N, 2) | |
| Projected 2D positions of voxels | |
| fov_mask: (N,) | |
| Voxels mask indice voxels inside image's FOV | |
| pix_z: (N,) | |
| Voxels'distance to the sensor in meter | |
| """ | |
| # Compute the x, y, z bounding of the scene in meter | |
| vol_bnds = np.zeros((3, 2)) | |
| vol_bnds[:, 0] = vox_origin | |
| vol_bnds[:, 1] = vox_origin + np.array(scene_size) | |
| # Compute the voxels centroids in lidar cooridnates | |
| vol_dim = np.ceil((vol_bnds[:, 1] - vol_bnds[:, 0]) / | |
| voxel_size).copy(order='C').astype(int) | |
| xv, yv, zv = np.meshgrid( | |
| range(vol_dim[0]), | |
| range(vol_dim[1]), | |
| range(vol_dim[2]), | |
| indexing='ij' | |
| ) | |
| vox_coords = np.concatenate([ | |
| xv.reshape(1, -1), | |
| yv.reshape(1, -1), | |
| zv.reshape(1, -1) | |
| ], axis=0).astype(int).T | |
| # Project voxels'centroid from lidar coordinates to camera coordinates | |
| cam_pts = fusion.TSDFVolume.vox2world(vox_origin, vox_coords, voxel_size) | |
| cam_pts = fusion.rigid_transform(cam_pts, cam_E) | |
| # Project camera coordinates to pixel positions | |
| projected_pix = fusion.TSDFVolume.cam2pix(cam_pts, cam_k) | |
| pix_x, pix_y = projected_pix[:, 0], projected_pix[:, 1] | |
| # Eliminate pixels outside view frustum | |
| pix_z = cam_pts[:, 2] | |
| fov_mask = np.logical_and(pix_x >= 0, | |
| np.logical_and(pix_x < img_W, | |
| np.logical_and(pix_y >= 0, | |
| np.logical_and(pix_y < img_H, | |
| pix_z > 0)))) | |
| return torch.from_numpy(projected_pix), torch.from_numpy(fov_mask), torch.from_numpy(pix_z) | |
| def get_grid_coords(dims, resolution): | |
| """ | |
| :param dims: the dimensions of the grid [x, y, z] (i.e. [256, 256, 32]) | |
| :return coords_grid: is the center coords of voxels in the grid | |
| """ | |
| g_xx = np.arange(0, dims[0] + 1) | |
| g_yy = np.arange(0, dims[1] + 1) | |
| sensor_pose = 10 | |
| g_zz = np.arange(0, dims[2] + 1) | |
| # Obtaining the grid with coords... | |
| xx, yy, zz = np.meshgrid(g_xx[:-1], g_yy[:-1], g_zz[:-1]) | |
| coords_grid = np.array([xx.flatten(), yy.flatten(), zz.flatten()]).T | |
| coords_grid = coords_grid.astype(np.float) | |
| coords_grid = (coords_grid * resolution) + resolution / 2 | |
| temp = np.copy(coords_grid) | |
| temp[:, 0] = coords_grid[:, 1] | |
| temp[:, 1] = coords_grid[:, 0] | |
| coords_grid = np.copy(temp) | |
| return coords_grid | |
| def get_projections(img_W, img_H): | |
| scale_3ds = [1, 2] | |
| data = {} | |
| for scale_3d in scale_3ds: | |
| scene_size = (4.8, 4.8, 2.88) | |
| vox_origin = np.array([-1.54591799, 0.8907361, -0.05]) | |
| voxel_size = 0.08 | |
| calib = read_calib("/monoscene/MonoScene/calib.txt") | |
| cam_k = np.array([[518.8579, 0, 320], [0, 518.8579, 240], [0, 0, 1]]) | |
| cam_pose = np.asarray([[9.6699458e-01, 4.2662762e-02, 2.5120059e-01, 0.0000000e+00], | |
| [-2.5147417e-01, 1.0867463e-03, | |
| 9.6786356e-01, 0.0000000e+00], | |
| [4.1018680e-02, -9.9908894e-01, | |
| 1.1779292e-02, 1.1794727e+00], | |
| [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00]]) | |
| T_velo_2_cam = np.linalg.inv(cam_pose) | |
| # compute the 3D-2D mapping | |
| projected_pix, fov_mask, pix_z = vox2pix( | |
| T_velo_2_cam, | |
| cam_k, | |
| vox_origin, | |
| voxel_size * scale_3d, | |
| img_W, | |
| img_H, | |
| scene_size, | |
| ) | |
| data["projected_pix_{}".format(scale_3d)] = projected_pix | |
| data["pix_z_{}".format(scale_3d)] = pix_z | |
| data["fov_mask_{}".format(scale_3d)] = fov_mask | |
| return data | |
| def majority_pooling(grid, k_size=2): | |
| result = np.zeros( | |
| (grid.shape[0] // k_size, grid.shape[1] // | |
| k_size, grid.shape[2] // k_size) | |
| ) | |
| for xx in range(0, int(np.floor(grid.shape[0] / k_size))): | |
| for yy in range(0, int(np.floor(grid.shape[1] / k_size))): | |
| for zz in range(0, int(np.floor(grid.shape[2] / k_size))): | |
| sub_m = grid[ | |
| (xx * k_size): (xx * k_size) + k_size, | |
| (yy * k_size): (yy * k_size) + k_size, | |
| (zz * k_size): (zz * k_size) + k_size, | |
| ] | |
| unique, counts = np.unique(sub_m, return_counts=True) | |
| if True in ((unique != 0) & (unique != 255)): | |
| # Remove counts with 0 and 255 | |
| counts = counts[((unique != 0) & (unique != 255))] | |
| unique = unique[((unique != 0) & (unique != 255))] | |
| else: | |
| if True in (unique == 0): | |
| counts = counts[(unique != 255)] | |
| unique = unique[(unique != 255)] | |
| value = unique[np.argmax(counts)] | |
| result[xx, yy, zz] = value | |
| return result | |
| def get_grid_coords(dims, resolution): | |
| """ | |
| :param dims: the dimensions of the grid [x, y, z] (i.e. [256, 256, 32]) | |
| :return coords_grid: is the center coords of voxels in the grid | |
| """ | |
| g_xx = np.arange(0, dims[0] + 1) | |
| g_yy = np.arange(0, dims[1] + 1) | |
| g_zz = np.arange(0, dims[2] + 1) | |
| # Obtaining the grid with coords... | |
| xx, yy, zz = np.meshgrid(g_xx[:-1], g_yy[:-1], g_zz[:-1]) | |
| coords_grid = np.array([xx.flatten(), yy.flatten(), zz.flatten()]).T | |
| coords_grid = coords_grid.astype(np.float) | |
| coords_grid = (coords_grid * resolution) + resolution / 2 | |
| temp = np.copy(coords_grid) | |
| temp[:, 0] = coords_grid[:, 1] | |
| temp[:, 1] = coords_grid[:, 0] | |
| coords_grid = np.copy(temp) | |
| return coords_grid | |
| def draw( | |
| voxels, | |
| cam_pose, | |
| vox_origin, | |
| voxel_size=0.08, | |
| d=0.75, # 0.75m - determine the size of the mesh representing the camera | |
| ): | |
| # Compute the coordinates of the mesh representing camera | |
| y = d * 480 / (2 * 518.8579) | |
| x = d * 640 / (2 * 518.8579) | |
| tri_points = np.array( | |
| [ | |
| [0, 0, 0], | |
| [x, y, d], | |
| [-x, y, d], | |
| [-x, -y, d], | |
| [x, -y, d], | |
| ] | |
| ) | |
| tri_points = np.hstack([tri_points, np.ones((5, 1))]) | |
| tri_points = (cam_pose @ tri_points.T).T | |
| x = tri_points[:, 0] - vox_origin[0] | |
| y = tri_points[:, 1] - vox_origin[1] | |
| z = tri_points[:, 2] - vox_origin[2] | |
| triangles = [ | |
| (0, 1, 2), | |
| (0, 1, 4), | |
| (0, 3, 4), | |
| (0, 2, 3), | |
| ] | |
| # Compute the voxels coordinates | |
| grid_coords = get_grid_coords( | |
| [voxels.shape[0], voxels.shape[2], voxels.shape[1]], voxel_size | |
| ) | |
| # Attach the predicted class to every voxel | |
| grid_coords = np.vstack( | |
| (grid_coords.T, np.moveaxis(voxels, [0, 1, 2], [0, 2, 1]).reshape(-1)) | |
| ).T | |
| # Remove empty and unknown voxels | |
| occupied_voxels = grid_coords[(grid_coords[:, 3] > 0) & (grid_coords[:, 3] < 255)] | |
| colors = np.array( | |
| [ | |
| [22, 191, 206, 255], | |
| [214, 38, 40, 255], | |
| [43, 160, 43, 255], | |
| [158, 216, 229, 255], | |
| [114, 158, 206, 255], | |
| [204, 204, 91, 255], | |
| [255, 186, 119, 255], | |
| [147, 102, 188, 255], | |
| [30, 119, 181, 255], | |
| [188, 188, 33, 255], | |
| [255, 127, 12, 255], | |
| [196, 175, 214, 255], | |
| [153, 153, 153, 255], | |
| ] | |
| ).astype(np.uint8) | |
| pts_colors = [ | |
| f'rgb({colors[int(i)][0]}, {colors[int(i)][1]}, {colors[int(i)][2]})' for i in occupied_voxels[:, 3]] | |
| out_fov_colors = [ | |
| f'rgb({colors[int(i)][0]//3*2}, {colors[int(i)][1]//3*2}, {colors[int(i)][2]//3*2})' for i in occupied_voxels[:, 3]] | |
| pts_colors = pts_colors + out_fov_colors | |
| fig = go.Figure(data=[go.Scatter3d(x=occupied_voxels[:, 0], y=occupied_voxels[:, 1], z=occupied_voxels[:, 2], mode='markers', | |
| marker=dict( | |
| size=4, | |
| color=pts_colors, # set color to an array/list of desired values | |
| opacity=1.0, | |
| symbol='square' | |
| ))]) | |
| fig.update_layout( | |
| scene=dict( | |
| aspectmode='data', | |
| yaxis=dict(visible=False, showticklabels=False), | |
| bgcolor="black", | |
| ), | |
| ) | |
| return fig | |