Spaces:
Sleeping
Sleeping
| """Miscellaneous utilities.""" | |
| import cv2 | |
| import random | |
| import matplotlib | |
| import matplotlib.pyplot as plt | |
| import meshcat | |
| import meshcat.geometry as g | |
| import meshcat.transformations as mtf | |
| import PIL | |
| import yaml | |
| import numpy as np | |
| from transforms3d import euler | |
| import pybullet as p | |
| import kornia | |
| from omegaconf import OmegaConf | |
| import os | |
| import torch | |
| import torchvision | |
| # ----------------------------------------------------------------------------- | |
| # HEIGHTMAP UTILS | |
| # ----------------------------------------------------------------------------- | |
| def get_heightmap(points, colors, bounds, pixel_size): | |
| """Get top-down (z-axis) orthographic heightmap image from 3D pointcloud. | |
| Args: | |
| points: HxWx3 float array of 3D points in world coordinates. | |
| colors: HxWx3 uint8 array of values in range 0-255 aligned with points. | |
| bounds: 3x2 float array of values (rows: X,Y,Z; columns: min,max) defining | |
| region in 3D space to generate heightmap in world coordinates. | |
| pixel_size: float defining size of each pixel in meters. | |
| Returns: | |
| heightmap: HxW float array of height (from lower z-bound) in meters. | |
| colormap: HxWx3 uint8 array of backprojected color aligned with heightmap. | |
| """ | |
| width = int(np.round((bounds[0, 1] - bounds[0, 0]) / pixel_size)) | |
| height = int(np.round((bounds[1, 1] - bounds[1, 0]) / pixel_size)) | |
| heightmap = np.zeros((height, width), dtype=np.float32) | |
| colormap = np.zeros((height, width, colors.shape[-1]), dtype=np.uint8) | |
| # Filter out 3D points that are outside of the predefined bounds. | |
| ix = (points[Ellipsis, 0] >= bounds[0, 0]) & (points[Ellipsis, 0] < bounds[0, 1]) | |
| iy = (points[Ellipsis, 1] >= bounds[1, 0]) & (points[Ellipsis, 1] < bounds[1, 1]) | |
| iz = (points[Ellipsis, 2] >= bounds[2, 0]) & (points[Ellipsis, 2] < bounds[2, 1]) | |
| valid = ix & iy & iz | |
| points = points[valid] | |
| colors = colors[valid] | |
| # Sort 3D points by z-value, which works with array assignment to simulate | |
| # z-buffering for rendering the heightmap image. | |
| iz = np.argsort(points[:, -1]) | |
| points, colors = points[iz], colors[iz] | |
| px = np.int32(np.floor((points[:, 0] - bounds[0, 0]) / pixel_size)) | |
| py = np.int32(np.floor((points[:, 1] - bounds[1, 0]) / pixel_size)) | |
| px = np.clip(px, 0, width - 1) | |
| py = np.clip(py, 0, height - 1) | |
| heightmap[py, px] = points[:, 2] - bounds[2, 0] | |
| for c in range(colors.shape[-1]): | |
| colormap[py, px, c] = colors[:, c] | |
| return heightmap, colormap | |
| def get_pointcloud(depth, intrinsics): | |
| """Get 3D pointcloud from perspective depth image. | |
| Args: | |
| depth: HxW float array of perspective depth in meters. | |
| intrinsics: 3x3 float array of camera intrinsics matrix. | |
| Returns: | |
| points: HxWx3 float array of 3D points in camera coordinates. | |
| """ | |
| height, width = depth.shape | |
| xlin = np.linspace(0, width - 1, width) | |
| ylin = np.linspace(0, height - 1, height) | |
| px, py = np.meshgrid(xlin, ylin) | |
| px = (px - intrinsics[0, 2]) * (depth / intrinsics[0, 0]) | |
| py = (py - intrinsics[1, 2]) * (depth / intrinsics[1, 1]) | |
| points = np.float32([px, py, depth]).transpose(1, 2, 0) | |
| return points | |
| def transform_pointcloud(points, transform): | |
| """Apply rigid transformation to 3D pointcloud. | |
| Args: | |
| points: HxWx3 float array of 3D points in camera coordinates. | |
| transform: 4x4 float array representing a rigid transformation matrix. | |
| Returns: | |
| points: HxWx3 float array of transformed 3D points. | |
| """ | |
| padding = ((0, 0), (0, 0), (0, 1)) | |
| homogen_points = np.pad(points.copy(), padding, | |
| 'constant', constant_values=1) | |
| for i in range(3): | |
| points[Ellipsis, i] = np.sum(transform[i, :] * homogen_points, axis=-1) | |
| return points | |
| def reconstruct_heightmaps(color, depth, configs, bounds, pixel_size): | |
| """Reconstruct top-down heightmap views from multiple 3D pointclouds.""" | |
| heightmaps, colormaps = [], [] | |
| for color, depth, config in zip(color, depth, configs): | |
| intrinsics = np.array(config['intrinsics']).reshape(3, 3) | |
| xyz = get_pointcloud(depth, intrinsics) | |
| position = np.array(config['position']).reshape(3, 1) | |
| rotation = p.getMatrixFromQuaternion(config['rotation']) | |
| rotation = np.array(rotation).reshape(3, 3) | |
| transform = np.eye(4) | |
| transform[:3, :] = np.hstack((rotation, position)) | |
| xyz = transform_pointcloud(xyz, transform) | |
| heightmap, colormap = get_heightmap(xyz, color, bounds, pixel_size) | |
| heightmaps.append(heightmap) | |
| colormaps.append(colormap) | |
| return heightmaps, colormaps | |
| def pix_to_xyz(pixel, height, bounds, pixel_size, skip_height=False): | |
| """Convert from pixel location on heightmap to 3D position.""" | |
| u, v = pixel | |
| x = bounds[0, 0] + v * pixel_size | |
| y = bounds[1, 0] + u * pixel_size | |
| if not skip_height: | |
| z = bounds[2, 0] + height[u, v] | |
| else: | |
| z = 0.0 | |
| return (x, y, z) | |
| def xyz_to_pix(position, bounds, pixel_size): | |
| """Convert from 3D position to pixel location on heightmap.""" | |
| u = int(np.round((position[1] - bounds[1, 0]) / pixel_size)) | |
| v = int(np.round((position[0] - bounds[0, 0]) / pixel_size)) | |
| return (u, v) | |
| def unproject_vectorized(uv_coordinates, depth_values, | |
| intrinsic, | |
| distortion): | |
| """Vectorized version of unproject(), for N points. | |
| Args: | |
| uv_coordinates: pixel coordinates to unproject of shape (n, 2). | |
| depth_values: depth values corresponding index-wise to the uv_coordinates of | |
| shape (n). | |
| intrinsic: array of shape (3, 3). This is typically the return value of | |
| intrinsics_to_matrix. | |
| distortion: camera distortion parameters of shape (5,). | |
| Returns: | |
| xyz coordinates in camera frame of shape (n, 3). | |
| """ | |
| cam_mtx = intrinsic # shape [3, 3] | |
| cam_dist = np.array(distortion) # shape [5] | |
| # shape of points_undistorted is [N, 2] after the squeeze(). | |
| points_undistorted = cv2.undistortPoints( | |
| uv_coordinates.reshape((-1, 1, 2)), cam_mtx, cam_dist).squeeze() | |
| x = points_undistorted[:, 0] * depth_values | |
| y = points_undistorted[:, 1] * depth_values | |
| xyz = np.vstack((x, y, depth_values)).T | |
| return xyz | |
| def unproject_depth_vectorized(im_depth, depth_dist, | |
| camera_mtx, | |
| camera_dist): | |
| """Unproject depth image into 3D point cloud, using calibration. | |
| Args: | |
| im_depth: raw depth image, pre-calibration of shape (height, width). | |
| depth_dist: depth distortion parameters of shape (8,) | |
| camera_mtx: intrinsics matrix of shape (3, 3). This is typically the return | |
| value of intrinsics_to_matrix. | |
| camera_dist: camera distortion parameters shape (5,). | |
| Returns: | |
| numpy array of shape [3, H*W]. each column is xyz coordinates | |
| """ | |
| h, w = im_depth.shape | |
| # shape of each u_map, v_map is [H, W]. | |
| u_map, v_map = np.meshgrid(np.linspace( | |
| 0, w - 1, w), np.linspace(0, h - 1, h)) | |
| adjusted_depth = depth_dist[0] + im_depth * depth_dist[1] | |
| # shape after stack is [N, 2], where N = H * W. | |
| uv_coordinates = np.stack((u_map.reshape(-1), v_map.reshape(-1)), axis=-1) | |
| return unproject_vectorized(uv_coordinates, adjusted_depth.reshape(-1), | |
| camera_mtx, camera_dist) | |
| # ----------------------------------------------------------------------------- | |
| # MATH UTILS | |
| # ----------------------------------------------------------------------------- | |
| def sample_distribution(prob, n_samples=1): | |
| """Sample data point from a custom distribution.""" | |
| flat_prob = prob.flatten() / np.sum(prob) | |
| rand_ind = np.random.choice( | |
| np.arange(len(flat_prob)), n_samples, p=flat_prob, replace=False) | |
| rand_ind_coords = np.array(np.unravel_index(rand_ind, prob.shape)).T | |
| return np.int32(rand_ind_coords.squeeze()) | |
| # ------------------------------------------------------------------------- | |
| # Transformation Helper Functions | |
| # ------------------------------------------------------------------------- | |
| def invert(pose): | |
| return p.invertTransform(pose[0], pose[1]) | |
| def multiply(pose0, pose1): | |
| return p.multiplyTransforms(pose0[0], pose0[1], pose1[0], pose1[1]) | |
| def apply(pose, position): | |
| position = np.float32(position) | |
| position_shape = position.shape | |
| position = np.float32(position).reshape(3, -1) | |
| rotation = np.float32(p.getMatrixFromQuaternion(pose[1])).reshape(3, 3) | |
| translation = np.float32(pose[0]).reshape(3, 1) | |
| position = rotation @ position + translation | |
| return tuple(position.reshape(position_shape)) | |
| def eulerXYZ_to_quatXYZW(rotation): # pylint: disable=invalid-name | |
| """Abstraction for converting from a 3-parameter rotation to quaterion. | |
| This will help us easily switch which rotation parameterization we use. | |
| Quaternion should be in xyzw order for pybullet. | |
| Args: | |
| rotation: a 3-parameter rotation, in xyz order tuple of 3 floats | |
| Returns: | |
| quaternion, in xyzw order, tuple of 4 floats | |
| """ | |
| euler_zxy = (rotation[2], rotation[0], rotation[1]) | |
| quaternion_wxyz = euler.euler2quat(*euler_zxy, axes='szxy') | |
| q = quaternion_wxyz | |
| quaternion_xyzw = (q[1], q[2], q[3], q[0]) | |
| return quaternion_xyzw | |
| def quatXYZW_to_eulerXYZ(quaternion_xyzw): # pylint: disable=invalid-name | |
| """Abstraction for converting from quaternion to a 3-parameter toation. | |
| This will help us easily switch which rotation parameterization we use. | |
| Quaternion should be in xyzw order for pybullet. | |
| Args: | |
| quaternion_xyzw: in xyzw order, tuple of 4 floats | |
| Returns: | |
| rotation: a 3-parameter rotation, in xyz order, tuple of 3 floats | |
| """ | |
| q = quaternion_xyzw | |
| quaternion_wxyz = np.array([q[3], q[0], q[1], q[2]]) | |
| euler_zxy = euler.quat2euler(quaternion_wxyz, axes='szxy') | |
| euler_xyz = (euler_zxy[1], euler_zxy[2], euler_zxy[0]) | |
| return euler_xyz | |
| def apply_transform(transform_to_from, points_from): | |
| r"""Transforms points (3D) into new frame. | |
| Using transform_to_from notation. | |
| Args: | |
| transform_to_from: numpy.ndarray of shape [B,4,4], SE3 | |
| points_from: numpy.ndarray of shape [B,3,N] | |
| Returns: | |
| points_to: numpy.ndarray of shape [B,3,N] | |
| """ | |
| num_points = points_from.shape[-1] | |
| # non-batched | |
| if len(transform_to_from.shape) == 2: | |
| ones = np.ones((1, num_points)) | |
| # makes these each into homogenous vectors | |
| points_from = np.vstack((points_from, ones)) # [4,N] | |
| points_to = transform_to_from @ points_from # [4,N] | |
| return points_to[0:3, :] # [3,N] | |
| # batched | |
| else: | |
| assert len(transform_to_from.shape) == 3 | |
| batch_size = transform_to_from.shape[0] | |
| zeros = np.ones((batch_size, 1, num_points)) | |
| points_from = np.concatenate((points_from, zeros), axis=1) | |
| assert points_from.shape[1] == 4 | |
| points_to = transform_to_from @ points_from | |
| return points_to[:, 0:3, :] | |
| # ----------------------------------------------------------------------------- | |
| # IMAGE UTILS | |
| # ----------------------------------------------------------------------------- | |
| def preprocess(img, dist='transporter'): | |
| """Pre-process input (subtract mean, divide by std).""" | |
| transporter_color_mean = [0.18877631, 0.18877631, 0.18877631] | |
| transporter_color_std = [0.07276466, 0.07276466, 0.07276466] | |
| transporter_depth_mean = 0.00509261 | |
| transporter_depth_std = 0.00903967 | |
| franka_color_mean = [0.622291933, 0.628313992, 0.623031488] | |
| franka_color_std = [0.168154213, 0.17626014, 0.184527364] | |
| franka_depth_mean = 0.872146842 | |
| franka_depth_std = 0.195743116 | |
| clip_color_mean = [0.48145466, 0.4578275, 0.40821073] | |
| clip_color_std = [0.26862954, 0.26130258, 0.27577711] | |
| # choose distribution | |
| if dist == 'clip': | |
| color_mean = clip_color_mean | |
| color_std = clip_color_std | |
| elif dist == 'mdetr': | |
| color_mean = [0.485, 0.456, 0.406] | |
| color_std = [0.229, 0.224, 0.225] | |
| elif dist == 'franka': | |
| color_mean = franka_color_mean | |
| color_std = franka_color_std | |
| else: | |
| color_mean = transporter_color_mean | |
| color_std = transporter_color_std | |
| if dist == 'franka': | |
| depth_mean = franka_depth_mean | |
| depth_std = franka_depth_std | |
| else: | |
| depth_mean = transporter_depth_mean | |
| depth_std = transporter_depth_std | |
| # convert to pytorch tensor (if required) | |
| if type(img) == torch.Tensor: | |
| def cast_shape(stat, img): | |
| tensor = torch.from_numpy(np.array(stat)).to(device=img.device, dtype=img.dtype) | |
| tensor = tensor.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) | |
| tensor = tensor.repeat(img.shape[0], 1, img.shape[-2], img.shape[-1]) | |
| return tensor | |
| color_mean = cast_shape(color_mean, img) | |
| color_std = cast_shape(color_std, img) | |
| depth_mean = cast_shape(depth_mean, img) | |
| depth_std = cast_shape(depth_std, img) | |
| # normalize | |
| img = img.clone() | |
| img[:, :3, :, :] = ((img[:, :3, :, :] / 255 - color_mean) / color_std) | |
| img[:, 3:, :, :] = ((img[:, 3:, :, :] - depth_mean) / depth_std) | |
| else: | |
| # normalize | |
| img[:, :, :3] = (img[:, :, :3] / 255 - color_mean) / color_std | |
| img[:, :, 3:] = (img[:, :, 3:] - depth_mean) / depth_std | |
| # if dist == 'franka' or dist == 'transporter': | |
| # print(np.mean(img[:,:3,:,:].detach().cpu().numpy(), axis=(0,2,3)), | |
| # np.mean(img[:,3,:,:].detach().cpu().numpy())) | |
| return img | |
| def map_kit_scale(scale): | |
| return (scale[0] / 10, scale[1] / 10, scale[2] / 10) | |
| def deprocess(img): | |
| color_mean = 0.18877631 | |
| depth_mean = 0.00509261 | |
| color_std = 0.07276466 | |
| depth_std = 0.00903967 | |
| img[:, :, :3] = np.uint8(((img[:, :, :3] * color_std) + color_mean) * 255) | |
| img[:, :, 3:] = np.uint8(((img[:, :, 3:] * depth_std) + depth_mean) * 255) | |
| return img | |
| def get_fused_heightmap(obs, configs, bounds, pix_size): | |
| """Reconstruct orthographic heightmaps with segmentation masks.""" | |
| heightmaps, colormaps = reconstruct_heightmaps( | |
| obs['color'], obs['depth'], configs, bounds, pix_size) | |
| colormaps = np.float32(colormaps) | |
| heightmaps = np.float32(heightmaps) | |
| # Fuse maps from different views. | |
| valid = np.sum(colormaps, axis=3) > 0 | |
| repeat = np.sum(valid, axis=0) | |
| repeat[repeat == 0] = 1 | |
| cmap = np.sum(colormaps, axis=0) / repeat[Ellipsis, None] | |
| cmap = np.uint8(np.round(cmap)) | |
| hmap = np.max(heightmaps, axis=0) # Max to handle occlusions. | |
| return cmap, hmap | |
| def get_image_transform(theta, trans, pivot=(0, 0)): | |
| """Compute composite 2D rigid transformation matrix.""" | |
| # Get 2D rigid transformation matrix that rotates an image by theta (in | |
| # radians) around pivot (in pixels) and translates by trans vector (in | |
| # pixels) | |
| pivot_t_image = np.array([[1., 0., -pivot[0]], [0., 1., -pivot[1]], | |
| [0., 0., 1.]]) | |
| image_t_pivot = np.array([[1., 0., pivot[0]], [0., 1., pivot[1]], | |
| [0., 0., 1.]]) | |
| transform = np.array([[np.cos(theta), -np.sin(theta), trans[0]], | |
| [np.sin(theta), np.cos(theta), trans[1]], [0., 0., 1.]]) | |
| return np.dot(image_t_pivot, np.dot(transform, pivot_t_image)) | |
| def check_transform(image, pixel, transform): | |
| """Valid transform only if pixel locations are still in FoV after transform.""" | |
| new_pixel = np.flip( | |
| np.int32( | |
| np.round( | |
| np.dot(transform, | |
| np.float32([pixel[1], pixel[0], | |
| 1.]).reshape(3, 1))))[:2].squeeze()) | |
| valid = np.all( | |
| new_pixel >= 0 | |
| ) and new_pixel[0] < image.shape[0] and new_pixel[1] < image.shape[1] | |
| return valid, new_pixel | |
| def get_se3_from_image_transform(theta, trans, pivot, heightmap, bounds, | |
| pixel_size): | |
| """Calculate SE3 from image transform.""" | |
| position_center = pix_to_xyz( | |
| np.flip(np.int32(np.round(pivot))), | |
| heightmap, | |
| bounds, | |
| pixel_size, | |
| skip_height=False) | |
| new_position_center = pix_to_xyz( | |
| np.flip(np.int32(np.round(pivot + trans))), | |
| heightmap, | |
| bounds, | |
| pixel_size, | |
| skip_height=True) | |
| # Don't look up the z height, it might get augmented out of frame | |
| new_position_center = (new_position_center[0], new_position_center[1], | |
| position_center[2]) | |
| delta_position = np.array(new_position_center) - np.array(position_center) | |
| t_world_center = np.eye(4) | |
| t_world_center[0:3, 3] = np.array(position_center) | |
| t_centernew_center = np.eye(4) | |
| euler_zxy = (-theta, 0, 0) | |
| t_centernew_center[0:3, 0:3] = euler.euler2mat( | |
| *euler_zxy, axes='szxy')[0:3, 0:3] | |
| t_centernew_center_tonly = np.eye(4) | |
| t_centernew_center_tonly[0:3, 3] = -delta_position | |
| t_centernew_center = t_centernew_center @ t_centernew_center_tonly | |
| t_world_centernew = t_world_center @ np.linalg.inv(t_centernew_center) | |
| return t_world_center, t_world_centernew | |
| def get_random_image_transform_params(image_size, theta_sigma=60): | |
| theta = np.random.normal(0, np.deg2rad(theta_sigma)) | |
| trans_sigma = np.min(image_size) / 6 | |
| trans = np.random.normal(0, trans_sigma, size=2) # [x, y] | |
| pivot = (image_size[1] / 2, image_size[0] / 2) | |
| return theta, trans, pivot | |
| def q_mult(q1, q2): | |
| w1, x1, y1, z1 = q1 | |
| w2, x2, y2, z2 = q2 | |
| w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 | |
| x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 | |
| y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2 | |
| z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2 | |
| return (w, x, y, z) | |
| def perturb(input_image, pixels, theta_sigma=60, add_noise=False): | |
| """Data augmentation on images.""" | |
| image_size = input_image.shape[:2] | |
| # Compute random rigid transform. | |
| while True: | |
| theta, trans, pivot = get_random_image_transform_params(image_size, theta_sigma=theta_sigma) | |
| transform = get_image_transform(theta, trans, pivot) | |
| transform_params = theta, trans, pivot | |
| # Ensure pixels remain in the image after transform. | |
| is_valid = True | |
| new_pixels = [] | |
| new_rounded_pixels = [] | |
| for pixel in pixels: | |
| pixel = np.float32([pixel[1], pixel[0], 1.]).reshape(3, 1) | |
| rounded_pixel = np.int32(np.round(transform @ pixel))[:2].squeeze() | |
| rounded_pixel = np.flip(rounded_pixel) | |
| pixel = (transform @ pixel)[:2].squeeze() | |
| pixel = np.flip(pixel) | |
| in_fov_rounded = rounded_pixel[0] < image_size[0] and rounded_pixel[ | |
| 1] < image_size[1] | |
| in_fov = pixel[0] < image_size[0] and pixel[1] < image_size[1] | |
| is_valid = is_valid and np.all(rounded_pixel >= 0) and np.all( | |
| pixel >= 0) and in_fov_rounded and in_fov | |
| new_pixels.append(pixel) | |
| new_rounded_pixels.append(rounded_pixel) | |
| if is_valid: | |
| break | |
| # Apply rigid transform to image and pixel labels. | |
| input_image = cv2.warpAffine( | |
| input_image, | |
| transform[:2, :], (image_size[1], image_size[0]), | |
| flags=cv2.INTER_LINEAR) | |
| # Apply noise | |
| color = np.int32(input_image[:,:,:3]) | |
| depth = np.float32(input_image[:,:,3:]) | |
| if add_noise: | |
| color += np.int32(np.random.normal(0, 3, image_size + (3,))) | |
| color = np.uint8(np.clip(color, 0, 255)) | |
| depth += np.float32(np.random.normal(0, 0.003, image_size + (3,))) | |
| input_image = np.concatenate((color, depth), axis=2) | |
| # length of 5 | |
| transform_params = np.array([theta, trans[0], trans[1], pivot[0], pivot[1]]) | |
| return input_image, new_pixels, new_rounded_pixels, transform_params | |
| def apply_perturbation(input_image, transform_params): | |
| '''Apply data augmentation with specific transform params''' | |
| image_size = input_image.shape[:2] | |
| # Apply rigid transform to image and pixel labels. | |
| theta, trans, pivot = transform_params[0], transform_params[1:3], transform_params[3:5] | |
| transform = get_image_transform(theta, trans, pivot) | |
| input_image = cv2.warpAffine( | |
| input_image, | |
| transform[:2, :], (image_size[1], image_size[0]), | |
| flags=cv2.INTER_LINEAR) | |
| return input_image | |
| class ImageRotator: | |
| """Rotate for n rotations.""" | |
| # Reference: https://kornia.readthedocs.io/en/latest/tutorials/warp_affine.html?highlight=rotate | |
| def __init__(self, n_rotations): | |
| self.angles = [] | |
| for i in range(n_rotations): | |
| theta = i * 2 * 180 / n_rotations | |
| self.angles.append(theta) | |
| def __call__(self, x_list, pivot, reverse=False): | |
| rot_x_list = [] | |
| for i, angle in enumerate(self.angles): | |
| x = x_list[i]# .unsqueeze(0) | |
| # create transformation (rotation) | |
| size = len(x) | |
| alpha = angle if not reverse else (-1.0 * angle) # in degrees | |
| angle = torch.ones(size) * alpha | |
| # define the rotation center | |
| if type(pivot) is not torch.Tensor: | |
| center = torch.FloatTensor(pivot)[...,[1,0]] | |
| center = center.view(1,-1).repeat((size,1)) | |
| else: | |
| center = pivot[...,[1,0]].view(1,-1).clone().to(angle.device) | |
| # center: torch.tensor = torch.ones(size, 2) | |
| # center[..., 0] = int(pivot[1]) | |
| # center[..., 1] = int(pivot[0]) | |
| # define the scale factor | |
| scale = torch.ones(size, 2) | |
| # # compute the transformation matrix | |
| M = kornia.geometry.get_rotation_matrix2d(center, angle, scale) | |
| # x_warped = torchvision.transforms.functional.affine(x.float(), scale=1., | |
| # center=[int(pivot[1]),int(pivot[0])], | |
| # angle=alpha, translate=[0,0], shear=0, | |
| # interpolation= torchvision.transforms.InterpolationMode.BILINEAR) | |
| # apply the transformation to original image | |
| # M = M.repeat(len(x), 1, 1) | |
| _, _, h, w = x.shape | |
| x_warped = kornia.geometry.transform.warp_affine(x.float(), M.to(x.device), dsize=(h, w)) | |
| x_warped = x_warped | |
| rot_x_list.append(x_warped) | |
| return rot_x_list | |
| # KD Tree Utils | |
| # Construct K-D Tree to roughly estimate how many objects can fit inside the box. | |
| class TreeNode: | |
| def __init__(self, parent, children, bbox): | |
| self.parent = parent | |
| self.children = children | |
| self.bbox = bbox # min x, min y, min z, max x, max y, max z | |
| def KDTree(node, min_object_dim, margin, bboxes): | |
| size = node.bbox[3:] - node.bbox[:3] | |
| # Choose which axis to split. | |
| split = size > 2 * min_object_dim | |
| if np.sum(split) == 0: | |
| bboxes.append(node.bbox) | |
| return | |
| split = np.float32(split) / np.sum(split) | |
| split_axis = np.random.choice(range(len(split)), 1, p=split)[0] | |
| # Split along chosen axis and create 2 children | |
| cut_ind = np.random.rand() * \ | |
| (size[split_axis] - 2 * min_object_dim) + \ | |
| node.bbox[split_axis] + min_object_dim | |
| child1_bbox = node.bbox.copy() | |
| child1_bbox[3 + split_axis] = cut_ind - margin / 2. | |
| child2_bbox = node.bbox.copy() | |
| child2_bbox[split_axis] = cut_ind + margin / 2. | |
| node.children = [ | |
| TreeNode(node, [], bbox=child1_bbox), | |
| TreeNode(node, [], bbox=child2_bbox) | |
| ] | |
| KDTree(node.children[0], min_object_dim, margin, bboxes) | |
| KDTree(node.children[1], min_object_dim, margin, bboxes) | |
| # ----------------------------------------------------------------------------- | |
| # Shape Name UTILS | |
| # ----------------------------------------------------------------------------- | |
| google_seen_obj_shapes = { | |
| 'train': [ | |
| 'alarm clock', | |
| 'android toy', | |
| 'black boot with leopard print', | |
| 'black fedora', | |
| 'black razer mouse', | |
| 'black sandal', | |
| 'black shoe with orange stripes', | |
| 'bull figure', | |
| 'butterfinger chocolate', | |
| 'c clamp', | |
| 'can opener', | |
| 'crayon box', | |
| 'dog statue', | |
| 'frypan', | |
| 'green and white striped towel', | |
| 'grey soccer shoe with cleats', | |
| 'hard drive', | |
| 'honey dipper', | |
| 'magnifying glass', | |
| 'mario figure', | |
| 'nintendo 3ds', | |
| 'nintendo cartridge', | |
| 'office depot box', | |
| 'orca plush toy', | |
| 'pepsi gold caffeine free box', | |
| 'pepsi wild cherry box', | |
| 'porcelain cup', | |
| 'purple tape', | |
| 'red and white flashlight', | |
| 'rhino figure', | |
| 'rocket racoon figure', | |
| 'scissors', | |
| 'silver tape', | |
| 'spatula with purple head', | |
| 'spiderman figure', | |
| 'tablet', | |
| 'toy school bus', | |
| ], | |
| 'val': [ | |
| 'ball puzzle', | |
| 'black and blue sneakers', | |
| 'black shoe with green stripes', | |
| 'brown fedora', | |
| 'dinosaur figure', | |
| 'hammer', | |
| 'light brown boot with golden laces', | |
| 'lion figure', | |
| 'pepsi max box', | |
| 'pepsi next box', | |
| 'porcelain salad plate', | |
| 'porcelain spoon', | |
| 'red and white striped towel', | |
| 'red cup', | |
| 'screwdriver', | |
| 'toy train', | |
| 'unicorn toy', | |
| 'white razer mouse', | |
| 'yoshi figure' | |
| ], | |
| 'test': [ | |
| 'ball puzzle', | |
| 'black and blue sneakers', | |
| 'black shoe with green stripes', | |
| 'brown fedora', | |
| 'dinosaur figure', | |
| 'hammer', | |
| 'light brown boot with golden laces', | |
| 'lion figure', | |
| 'pepsi max box', | |
| 'pepsi next box', | |
| 'porcelain salad plate', | |
| 'porcelain spoon', | |
| 'red and white striped towel', | |
| 'red cup', | |
| 'screwdriver', | |
| 'toy train', | |
| 'unicorn toy', | |
| 'white razer mouse', | |
| 'yoshi figure' | |
| ], | |
| } | |
| google_unseen_obj_shapes = { | |
| 'train': [ | |
| 'alarm clock', | |
| 'android toy', | |
| 'black boot with leopard print', | |
| 'black fedora', | |
| 'black razer mouse', | |
| 'black sandal', | |
| 'black shoe with orange stripes', | |
| 'bull figure', | |
| 'butterfinger chocolate', | |
| 'c clamp', | |
| 'can opener', | |
| 'crayon box', | |
| 'dog statue', | |
| 'frypan', | |
| 'green and white striped towel', | |
| 'grey soccer shoe with cleats', | |
| 'hard drive', | |
| 'honey dipper', | |
| 'magnifying glass', | |
| 'mario figure', | |
| 'nintendo 3ds', | |
| 'nintendo cartridge', | |
| 'office depot box', | |
| 'orca plush toy', | |
| 'pepsi gold caffeine free box', | |
| 'pepsi wild cherry box', | |
| 'porcelain cup', | |
| 'purple tape', | |
| 'red and white flashlight', | |
| 'rhino figure', | |
| 'rocket racoon figure', | |
| 'scissors', | |
| 'silver tape', | |
| 'spatula with purple head', | |
| 'spiderman figure', | |
| 'tablet', | |
| 'toy school bus', | |
| ], | |
| 'val': [ | |
| 'ball puzzle', | |
| 'black and blue sneakers', | |
| 'black shoe with green stripes', | |
| 'brown fedora', | |
| 'dinosaur figure', | |
| 'hammer', | |
| 'light brown boot with golden laces', | |
| 'lion figure', | |
| 'pepsi max box', | |
| 'pepsi next box', | |
| 'porcelain salad plate', | |
| 'porcelain spoon', | |
| 'red and white striped towel', | |
| 'red cup', | |
| 'screwdriver', | |
| 'toy train', | |
| 'unicorn toy', | |
| 'white razer mouse', | |
| 'yoshi figure' | |
| ], | |
| 'test': [ | |
| 'ball puzzle', | |
| 'black and blue sneakers', | |
| 'black shoe with green stripes', | |
| 'brown fedora', | |
| 'dinosaur figure', | |
| 'hammer', | |
| 'light brown boot with golden laces', | |
| 'lion figure', | |
| 'pepsi max box', | |
| 'pepsi next box', | |
| 'porcelain salad plate', | |
| 'porcelain spoon', | |
| 'red and white striped towel', | |
| 'red cup', | |
| 'screwdriver', | |
| 'toy train', | |
| 'unicorn toy', | |
| 'white razer mouse', | |
| 'yoshi figure' | |
| ], | |
| } | |
| google_all_shapes = { | |
| 'train': [ | |
| 'alarm clock', | |
| 'android toy', | |
| 'ball puzzle', | |
| 'black and blue sneakers', | |
| 'black boot with leopard print', | |
| 'black fedora', | |
| 'black razer mouse', | |
| 'black sandal', | |
| 'black shoe with green stripes', | |
| 'black shoe with orange stripes', | |
| 'brown fedora', | |
| 'bull figure', | |
| 'butterfinger chocolate', | |
| 'c clamp', | |
| 'can opener', | |
| 'crayon box', | |
| 'dinosaur figure', | |
| 'dog statue', | |
| 'frypan', | |
| 'green and white striped towel', | |
| 'grey soccer shoe with cleats', | |
| 'hammer', | |
| 'hard drive', | |
| 'honey dipper', | |
| 'light brown boot with golden laces', | |
| 'lion figure', | |
| 'magnifying glass', | |
| 'mario figure', | |
| 'nintendo 3ds', | |
| 'nintendo cartridge', | |
| 'office depot box', | |
| 'orca plush toy', | |
| 'pepsi gold caffeine free box', | |
| 'pepsi max box', | |
| 'pepsi next box', | |
| 'pepsi wild cherry box', | |
| 'porcelain cup', | |
| 'porcelain salad plate', | |
| 'porcelain spoon', | |
| 'purple tape', | |
| 'red and white flashlight', | |
| 'red and white striped towel', | |
| 'red cup', | |
| 'rhino figure', | |
| 'rocket racoon figure', | |
| 'scissors', | |
| 'screwdriver', | |
| 'silver tape', | |
| 'spatula with purple head', | |
| 'spiderman figure', | |
| 'tablet', | |
| 'toy school bus', | |
| 'toy train', | |
| 'unicorn toy', | |
| 'white razer mouse', | |
| 'yoshi figure', | |
| ], | |
| 'val': [ | |
| 'alarm clock', | |
| 'android toy', | |
| 'ball puzzle', | |
| 'black and blue sneakers', | |
| 'black boot with leopard print', | |
| 'black fedora', | |
| 'black razer mouse', | |
| 'black sandal', | |
| 'black shoe with green stripes', | |
| 'black shoe with orange stripes', | |
| 'brown fedora', | |
| 'bull figure', | |
| 'butterfinger chocolate', | |
| 'c clamp', | |
| 'can opener', | |
| 'crayon box', | |
| 'dinosaur figure', | |
| 'dog statue', | |
| 'frypan', | |
| 'green and white striped towel', | |
| 'grey soccer shoe with cleats', | |
| 'hammer', | |
| 'hard drive', | |
| 'honey dipper', | |
| 'light brown boot with golden laces', | |
| 'lion figure', | |
| 'magnifying glass', | |
| 'mario figure', | |
| 'nintendo 3ds', | |
| 'nintendo cartridge', | |
| 'office depot box', | |
| 'orca plush toy', | |
| 'pepsi gold caffeine free box', | |
| 'pepsi max box', | |
| 'pepsi next box', | |
| 'pepsi wild cherry box', | |
| 'porcelain cup', | |
| 'porcelain salad plate', | |
| 'porcelain spoon', | |
| 'purple tape', | |
| 'red and white flashlight', | |
| 'red and white striped towel', | |
| 'red cup', | |
| 'rhino figure', | |
| 'rocket racoon figure', | |
| 'scissors', | |
| 'screwdriver', | |
| 'silver tape', | |
| 'spatula with purple head', | |
| 'spiderman figure', | |
| 'tablet', | |
| 'toy school bus', | |
| 'toy train', | |
| 'unicorn toy', | |
| 'white razer mouse', | |
| 'yoshi figure', | |
| ], | |
| 'test': [ | |
| 'alarm clock', | |
| 'android toy', | |
| 'ball puzzle', | |
| 'black and blue sneakers', | |
| 'black boot with leopard print', | |
| 'black fedora', | |
| 'black razer mouse', | |
| 'black sandal', | |
| 'black shoe with green stripes', | |
| 'black shoe with orange stripes', | |
| 'brown fedora', | |
| 'bull figure', | |
| 'butterfinger chocolate', | |
| 'c clamp', | |
| 'can opener', | |
| 'crayon box', | |
| 'dinosaur figure', | |
| 'dog statue', | |
| 'frypan', | |
| 'green and white striped towel', | |
| 'grey soccer shoe with cleats', | |
| 'hammer', | |
| 'hard drive', | |
| 'honey dipper', | |
| 'light brown boot with golden laces', | |
| 'lion figure', | |
| 'magnifying glass', | |
| 'mario figure', | |
| 'nintendo 3ds', | |
| 'nintendo cartridge', | |
| 'office depot box', | |
| 'orca plush toy', | |
| 'pepsi gold caffeine free box', | |
| 'pepsi max box', | |
| 'pepsi next box', | |
| 'pepsi wild cherry box', | |
| 'porcelain cup', | |
| 'porcelain salad plate', | |
| 'porcelain spoon', | |
| 'purple tape', | |
| 'red and white flashlight', | |
| 'red and white striped towel', | |
| 'red cup', | |
| 'rhino figure', | |
| 'rocket racoon figure', | |
| 'scissors', | |
| 'screwdriver', | |
| 'silver tape', | |
| 'spatula with purple head', | |
| 'spiderman figure', | |
| 'tablet', | |
| 'toy school bus', | |
| 'toy train', | |
| 'unicorn toy', | |
| 'white razer mouse', | |
| 'yoshi figure', | |
| ], | |
| } | |
| assembling_kit_shapes = { | |
| 0: "letter R shape", | |
| 1: "letter A shape", | |
| 2: "triangle", | |
| 3: "square", | |
| 4: "plus", | |
| 5: "letter T shape", | |
| 6: "diamond", | |
| 7: "pentagon", | |
| 8: "rectangle", | |
| 9: "flower", | |
| 10: "star", | |
| 11: "circle", | |
| 12: "letter G shape", | |
| 13: "letter V shape", | |
| 14: "letter E shape", | |
| 15: "letter L shape", | |
| 16: "ring", | |
| 17: "hexagon", | |
| 18: "heart", | |
| 19: "letter M shape", | |
| } | |
| # ----------------------------------------------------------------------------- | |
| # COLOR AND PLOT UTILS | |
| # ----------------------------------------------------------------------------- | |
| # Colors (Tableau palette). | |
| COLORS = { | |
| 'blue': [78.0 / 255.0, 121.0 / 255.0, 167.0 / 255.0], | |
| 'red': [255.0 / 255.0, 087.0 / 255.0, 089.0 / 255.0], | |
| 'green': [089.0 / 255.0, 169.0 / 255.0, 078.0 / 255.0], | |
| 'orange': [242.0 / 255.0, 142.0 / 255.0, 043.0 / 255.0], | |
| 'yellow': [237.0 / 255.0, 201.0 / 255.0, 072.0 / 255.0], | |
| 'purple': [176.0 / 255.0, 122.0 / 255.0, 161.0 / 255.0], | |
| 'pink': [255.0 / 255.0, 157.0 / 255.0, 167.0 / 255.0], | |
| 'cyan': [118.0 / 255.0, 183.0 / 255.0, 178.0 / 255.0], | |
| 'brown': [156.0 / 255.0, 117.0 / 255.0, 095.0 / 255.0], | |
| 'white': [255.0 / 255.0, 255.0 / 255.0, 255.0 / 255.0], | |
| 'gray': [186.0 / 255.0, 176.0 / 255.0, 172.0 / 255.0], | |
| 'indigo': [75.0 / 255.0, 0.0 / 255.0, 130.0 / 255.0], | |
| 'violet': [143.0 / 255.0, 0.0 / 255.0, 255.0 / 255.0], | |
| 'black': [0.0 / 255.0, 0.0 / 255.0, 0.0 / 255.0], | |
| 'silver': [192.0 / 255.0, 192.0 / 255.0, 192.0 / 255.0], | |
| 'gold': [255.0 / 255.0, 215.0 / 255.0, 0.0 / 255.0], | |
| } | |
| COLORS_NAMES = list(COLORS.keys()) | |
| TRAIN_COLORS = ['blue', 'red', 'green', 'yellow', 'brown', 'gray', 'cyan'] | |
| EVAL_COLORS = ['blue', 'red', 'green', 'orange', 'purple', 'pink', 'white'] | |
| def get_colors(mode, n_colors=-1, **kwargs): | |
| all_color_names = get_colors_names(mode) | |
| if n_colors == -1: | |
| all_color_names = all_color_names | |
| else: | |
| all_color_names = random.sample(all_color_names, n_colors) | |
| return [COLORS[cn] for cn in all_color_names], all_color_names | |
| def get_colors_names(mode): | |
| if mode == 'train': | |
| return TRAIN_COLORS | |
| elif mode == 'full': | |
| return TRAIN_COLORS | |
| else: | |
| return TRAIN_COLORS | |
| def get_random_color(): | |
| return get_colors(mode='train', n_colors=1) | |
| def solve_hanoi_all(n_disks): | |
| # Solve Hanoi sequence with dynamic programming. | |
| hanoi_steps = [] # [[object index, from rod, to rod], ...] | |
| def solve_hanoi(n, t0, t1, t2): | |
| if n == 0: | |
| hanoi_steps.append([n, t0, t1]) | |
| return | |
| solve_hanoi(n - 1, t0, t2, t1) | |
| hanoi_steps.append([n, t0, t1]) | |
| solve_hanoi(n - 1, t2, t1, t0) | |
| solve_hanoi(n_disks - 1, 0, 2, 1) | |
| return hanoi_steps | |
| def plot(fname, # pylint: disable=dangerous-default-value | |
| title, | |
| ylabel, | |
| xlabel, | |
| data, | |
| xlim=[-np.inf, 0], | |
| xticks=None, | |
| ylim=[np.inf, -np.inf], | |
| show_std=True): | |
| """Plot frame data.""" | |
| # Data is a dictionary that maps experiment names to tuples with 3 | |
| # elements: x (size N array) and y (size N array) and y_std (size N array) | |
| # Get data limits. | |
| for name, (x, y, _) in data.items(): | |
| del name | |
| y = np.array(y) | |
| xlim[0] = max(xlim[0], np.min(x)) | |
| xlim[1] = max(xlim[1], np.max(x)) | |
| ylim[0] = min(ylim[0], np.min(y)) | |
| ylim[1] = max(ylim[1], np.max(y)) | |
| # Draw background. | |
| plt.title(title, fontsize=14) | |
| plt.ylim(ylim) | |
| plt.ylabel(ylabel, fontsize=14) | |
| plt.yticks(fontsize=14) | |
| plt.xlim(xlim) | |
| plt.xlabel(xlabel, fontsize=14) | |
| plt.grid(True, linestyle='-', color=[0.8, 0.8, 0.8]) | |
| ax = plt.gca() | |
| for axis in ['top', 'bottom', 'left', 'right']: | |
| ax.spines[axis].set_color('#000000') | |
| plt.rcParams.update({'font.size': 14}) | |
| plt.rcParams['mathtext.default'] = 'regular' | |
| matplotlib.rcParams['pdf.fonttype'] = 42 | |
| matplotlib.rcParams['ps.fonttype'] = 42 | |
| # Draw data. | |
| color_iter = 0 | |
| for name, (x, y, std) in data.items(): | |
| del name | |
| x, y, std = np.float32(x), np.float32(y), np.float32(std) | |
| upper = np.clip(y + std, ylim[0], ylim[1]) | |
| lower = np.clip(y - std, ylim[0], ylim[1]) | |
| color = COLORS[list(COLORS.keys())[color_iter]] | |
| if show_std: | |
| plt.fill_between(x, upper, lower, color=color, linewidth=0, alpha=0.3) | |
| plt.plot(x, y, color=color, linewidth=2, marker='o', alpha=1.) | |
| color_iter += 1 | |
| if xticks: | |
| plt.xticks(ticks=range(len(xticks)), labels=xticks, fontsize=14) | |
| else: | |
| plt.xticks(fontsize=14) | |
| plt.legend([name for name, _ in data.items()], | |
| loc='lower right', fontsize=14) | |
| plt.tight_layout() | |
| plt.savefig(fname) | |
| plt.clf() | |
| # ----------------------------------------------------------------------------- | |
| # MESHCAT UTILS | |
| # ----------------------------------------------------------------------------- | |
| def create_visualizer(clear=True): | |
| print('Waiting for meshcat server... have you started a server?') | |
| vis = meshcat.Visualizer(zmq_url='tcp://127.0.0.1:6000') | |
| if clear: | |
| vis.delete() | |
| return vis | |
| def make_frame(vis, name, h, radius, o=1.0): | |
| """Add a red-green-blue triad to the Meschat visualizer. | |
| Args: | |
| vis (MeshCat Visualizer): the visualizer | |
| name (string): name for this frame (should be unique) | |
| h (float): height of frame visualization | |
| radius (float): radius of frame visualization | |
| o (float): opacity | |
| """ | |
| vis[name]['x'].set_object( | |
| g.Cylinder(height=h, radius=radius), | |
| g.MeshLambertMaterial(color=0xff0000, reflectivity=0.8, opacity=o)) | |
| rotate_x = mtf.rotation_matrix(np.pi / 2.0, [0, 0, 1]) | |
| rotate_x[0, 3] = h / 2 | |
| vis[name]['x'].set_transform(rotate_x) | |
| vis[name]['y'].set_object( | |
| g.Cylinder(height=h, radius=radius), | |
| g.MeshLambertMaterial(color=0x00ff00, reflectivity=0.8, opacity=o)) | |
| rotate_y = mtf.rotation_matrix(np.pi / 2.0, [0, 1, 0]) | |
| rotate_y[1, 3] = h / 2 | |
| vis[name]['y'].set_transform(rotate_y) | |
| vis[name]['z'].set_object( | |
| g.Cylinder(height=h, radius=radius), | |
| g.MeshLambertMaterial(color=0x0000ff, reflectivity=0.8, opacity=o)) | |
| rotate_z = mtf.rotation_matrix(np.pi / 2.0, [1, 0, 0]) | |
| rotate_z[2, 3] = h / 2 | |
| vis[name]['z'].set_transform(rotate_z) | |
| def meshcat_visualize(vis, obs, act, info): | |
| """Visualize data using meshcat.""" | |
| for key in sorted(info.keys()): | |
| pose = info[key] | |
| pick_transform = np.eye(4) | |
| pick_transform[0:3, 3] = pose[0] | |
| quaternion_wxyz = np.asarray( | |
| [pose[1][3], pose[1][0], pose[1][1], pose[1][2]]) | |
| pick_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3] | |
| label = 'obj_' + str(key) | |
| make_frame(vis, label, h=0.05, radius=0.0012, o=1.0) | |
| vis[label].set_transform(pick_transform) | |
| for cam_index in range(len(act['camera_config'])): | |
| verts = unproject_depth_vectorized( | |
| obs['depth'][cam_index], np.array([0, 1]), | |
| np.array(act['camera_config'][cam_index]['intrinsics']).reshape(3, 3), | |
| np.zeros(5)) | |
| # switch from [N,3] to [3,N] | |
| verts = verts.T | |
| cam_transform = np.eye(4) | |
| cam_transform[0:3, 3] = act['camera_config'][cam_index]['position'] | |
| quaternion_xyzw = act['camera_config'][cam_index]['rotation'] | |
| quaternion_wxyz = np.asarray([ | |
| quaternion_xyzw[3], quaternion_xyzw[0], quaternion_xyzw[1], | |
| quaternion_xyzw[2] | |
| ]) | |
| cam_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3] | |
| verts = apply_transform(cam_transform, verts) | |
| colors = obs['color'][cam_index].reshape(-1, 3).T / 255.0 | |
| vis['pointclouds/' + str(cam_index)].set_object( | |
| g.PointCloud(position=verts, color=colors)) | |
| # ----------------------------------------------------------------------------- | |
| # CONFIG UTILS | |
| # ----------------------------------------------------------------------------- | |
| def set_seed(seed, torch=False): | |
| random.seed(seed) | |
| os.environ['PYTHONHASHSEED'] = str(seed) | |
| np.random.seed(seed) | |
| if torch: | |
| import torch | |
| torch.manual_seed(seed) | |
| def load_cfg(yaml_path): | |
| with open(yaml_path, 'r') as f: | |
| data = yaml.safe_load(f) | |
| return data | |
| def load_hydra_config(config_path): | |
| return OmegaConf.load(config_path) | |