import cv2 import numpy as np import networkx as nx from util import depth_to_world_coordinates import random import open3d as o3d from tqdm import tqdm, trange class ExplorationAgent: def __init__(self, map_size=100): # navigation (grid) graph self.graph = nx.DiGraph() self.current_waypoint = (0, 0, 0, 0) self.map_size = map_size self.offset_x = self.map_size // 2 self.offset_z = self.map_size // 2 # self.memory = np.zeros((self.map_size, self.map_size), dtype=np.uint8) # self.movable = np.ones((self.map_size, self.map_size), dtype=np.uint8) # self.visited = np.zeros((self.map_size, self.map_size), dtype=np.uint8) # self.explorable = np.zeros((self.map_size, self.map_size), dtype=np.uint8) self.masks = {} self.num_erosion = 1 self.points = None self.points_objects = None self.points_receps = None self.points_floor = None self.points_waypoints = None self.points_quantized = None self.target = None self.points_object_target = None self.points_toggle_target = None self.points_object_sliced = None self.points_mrecep_target = None self.points_parent_target = None self.points_task_specific = None # Cool, Heat, Clean self.points_removed = [] self.points_visited = [] @property def current_waypoint_str(self): return '{}|{}|{}|{}'.format(*list(self.current_waypoint)) def remove_node(self, node): for rotation in range(0, 360, 90): for horizon in range(-30, 60+1, 15): n = (node[0], node[1], rotation, horizon) if n in self.graph: self.graph.remove_node(n) self.points_removed.append([node[0], node[1]]) def set_current_position(self, event): x = np.round(event.metadata['agent']['position']['x'] / 0.25).astype(np.int32) + self.offset_x z = np.round(event.metadata['agent']['position']['z'] / 0.25).astype(np.int32) + self.offset_z rotation = int((event.metadata['agent']['rotation']['y'] + 360) % 360) for r in range(0, 360, 90): if abs(rotation % 360 - r) < 5: rotation = r break horizon = event.metadata['agent']['cameraHorizon'] for r in range(-30, 60+1, 15): if abs(horizon - r) < 5: horizon = r break self.current_waypoint = (int(x), int(z), rotation, horizon) for p in self.points_visited: if p[0] == self.current_waypoint[0] and p[1] == self.current_waypoint[1]: return self.points_visited.append([self.current_waypoint[0], self.current_waypoint[1]]) # self.visited[self.current_waypoint[0],self.current_waypoint[1]] = 1 def update_memory( self, depth_original, metadata, masks_objects, masks_receps, conf, object_mask, receps_mask, object_target_mask, toggle_target_mask, object_sliced_mask, mrecep_target_mask, parent_target_mask, task_specific_mask, ): # # store objects/receps masks # if self.current_waypoint_str not in self.masks: # self.masks[self.current_waypoint_str] = { # 'objects': masks_objects, # 'receps': masks_receps, # } # memory = np.zeros_like(self.memory) depth = depth_original.copy() depth[conf < 0.7] = 0 points = depth_to_world_coordinates( depth, self.current_waypoint, 0 ).reshape(-1, 3) depth_object = depth_original.copy() * object_mask depth_object[conf < 0.1] = 0 points_objects = depth_to_world_coordinates( depth_object, self.current_waypoint, 0 ).reshape(-1, 3) depth_receps = depth_original.copy() * receps_mask depth_receps[conf < 0.8] = 0 points_receps = depth_to_world_coordinates( depth_receps, self.current_waypoint, 0 ).reshape(-1, 3) depth_object_target = depth_original.copy() * object_target_mask depth_object_target[conf < 0.3] = 0 points_object_target = depth_to_world_coordinates( depth_object_target, self.current_waypoint, 0 ).reshape(-1, 3) depth_toggle_target = depth_original.copy() * toggle_target_mask depth_toggle_target[conf < 0.3] = 0 points_toggle_target = depth_to_world_coordinates( depth_toggle_target, self.current_waypoint, 0 ).reshape(-1, 3) depth_object_sliced = depth_original.copy() * object_sliced_mask depth_object_sliced[conf < 0.3] = 0 points_object_sliced = depth_to_world_coordinates( depth_object_sliced, self.current_waypoint, 0 ).reshape(-1, 3) depth_mrecep_target = depth_original.copy() * mrecep_target_mask depth_mrecep_target[conf < 0.8] = 0 points_mrecep_target = depth_to_world_coordinates( depth_mrecep_target, self.current_waypoint, 0 ).reshape(-1, 3) depth_parent_target = depth_original.copy() * parent_target_mask depth_parent_target[conf < 0.8] = 0 points_parent_target = depth_to_world_coordinates( depth_parent_target, self.current_waypoint, 0 ).reshape(-1, 3) depth_task_specific = depth_original.copy() * task_specific_mask depth_task_specific[conf < 0.1] = 0 points_task_specific = depth_to_world_coordinates( depth_task_specific, self.current_waypoint, 0 ).reshape(-1, 3) # remove agent's pose pcd p = np.array([[self.current_waypoint[0], self.current_waypoint[1], 0]], dtype=np.int) * 10 indices = ((points*10).astype(np.int) != p).any(axis=1) points = points[indices] indices = ((points_objects*10).astype(np.int) != p).any(axis=1) points_objects = points_objects[indices] indices = ((points_receps*10).astype(np.int) != p).any(axis=1) points_receps = points_receps[indices] indices = ((points_object_target*10).astype(np.int) != p).any(axis=1) points_object_target = points_object_target[indices] indices = ((points_toggle_target*10).astype(np.int) != p).any(axis=1) points_toggle_target = points_toggle_target[indices] indices = ((points_object_sliced*10).astype(np.int) != p).any(axis=1) points_object_sliced = points_object_sliced[indices] indices = ((points_mrecep_target*10).astype(np.int) != p).any(axis=1) points_mrecep_target = points_mrecep_target[indices] indices = ((points_parent_target*10).astype(np.int) != p).any(axis=1) points_parent_target = points_parent_target[indices] indices = ((points_task_specific*10).astype(np.int) != p).any(axis=1) points_task_specific = points_task_specific[indices] if self.points is None: self.points = points self.points_objects = points_objects self.points_receps = points_receps self.points_object_target = points_object_target self.points_toggle_target = points_toggle_target self.points_object_sliced = points_object_sliced self.points_mrecep_target = points_mrecep_target self.points_parent_target = points_parent_target self.points_task_specific = points_task_specific else: self.points = np.vstack((self.points, points)) self.points_objects = np.vstack((self.points_objects, points_objects)) self.points_receps = np.vstack((self.points_receps, points_receps)) self.points_object_target = np.vstack((self.points_object_target, points_object_target)) self.points_toggle_target = np.vstack((self.points_toggle_target, points_toggle_target)) self.points_object_sliced = np.vstack((self.points_object_sliced, points_object_sliced)) self.points_mrecep_target = np.vstack((self.points_mrecep_target, points_mrecep_target)) self.points_parent_target = np.vstack((self.points_parent_target, points_parent_target)) self.points_task_specific = np.vstack((self.points_task_specific, points_task_specific)) self.points = np.round(self.points, 1) self.points = np.unique(self.points, axis=0) self.points_objects = np.round(self.points_objects, 1) self.points_objects = np.unique(self.points_objects, axis=0) self.points_objects = self.points_objects[self.points_objects[:,2] >= -6] self.points_receps = np.round(self.points_receps, 1) self.points_receps = np.unique(self.points_receps, axis=0) self.points_receps = self.points_receps[self.points_receps[:,2] >= -6] self.points_floor = self.points[self.points[:,2] < -6] self.points_floor[:,2] = -6.1 #self.points_floor[:,2].min() self.points_floor = np.unique(self.points_floor, axis=0) self.points_quantized = self.points_floor[ # (abs(self.points_floor[:,0] % 1) < 0.1) * \ (abs(self.points_floor[:,1] % 1) < 0.1) ] self.points_quantized = np.round(self.points_quantized).astype(np.int) points_quantized = [] for i in range(len(self.points_quantized)): # kill unnavigable points if any(self.points_quantized[i,0] == p[0] and \ self.points_quantized[i,1] == p[1] for p in self.points_removed): continue # kill navigable space on receptacle points if len(self.points_receps) > 0: # distances = np.linalg.norm(self.points_receps[:,:2] - self.points_quantized[i,:2], axis=1) distances = abs(self.points_receps[:,:2] - self.points_quantized[i,:2]).sum(axis=1) min_distance = np.min(distances) if min_distance < 1: continue # kill boundary navigable space r = len(self.points_floor[ (abs(self.points_floor[:,0] - self.points_quantized[i][0]) < 1) * \ (abs(self.points_floor[:,1] - self.points_quantized[i][1]) < 1) ]) / (20*20) if r > 0.6: points_quantized.append(self.points_quantized[i]) for point in self.points_visited: points_quantized.append([point[0], point[1], -6.1]) self.points_quantized = np.array(points_quantized, dtype=np.int) self.points_object_target = np.round(self.points_object_target, 1) self.points_object_target = np.unique(self.points_object_target, axis=0) self.points_object_target = self.points_object_target[self.points_object_target[:,2] >= -6] #print(self.points_floor.shape, self.points_object_target.shape) self.points_toggle_target = np.round(self.points_toggle_target, 1) self.points_toggle_target = np.unique(self.points_toggle_target, axis=0) self.points_toggle_target = self.points_toggle_target[self.points_toggle_target[:,2] >= -6] self.points_object_sliced = np.round(self.points_object_sliced, 1) self.points_object_sliced = np.unique(self.points_object_sliced, axis=0) self.points_object_sliced = self.points_object_sliced[self.points_object_sliced[:,2] >= -6] self.points_mrecep_target = np.round(self.points_mrecep_target, 1) self.points_mrecep_target = np.unique(self.points_mrecep_target, axis=0) self.points_mrecep_target = self.points_mrecep_target[self.points_mrecep_target[:,2] >= -6] self.points_parent_target = np.round(self.points_parent_target, 1) self.points_parent_target = np.unique(self.points_parent_target, axis=0) self.points_parent_target = self.points_parent_target[self.points_parent_target[:,2] >= -6] self.points_task_specific = np.round(self.points_task_specific, 1) self.points_task_specific = np.unique(self.points_task_specific, axis=0) self.points_task_specific = self.points_task_specific[self.points_task_specific[:,2] >= -6] # Rotation / elevation connectivity # for x, z in tqdm(zip(*memory.nonzero()), desc='map constructing'): for i in range(len(self.points_quantized)):#, desc='map constructing'): x, z = self.points_quantized[i,:2] for r, rotation in enumerate(range(0, 360, 90)): for e, elevation in enumerate(range(-30, 60+1, 15)): self.graph.add_node((x, z, rotation, elevation), weight=1) self.graph.add_edge( (x, z, rotation, elevation), (x, z, (rotation+90) % 360, elevation), weight=1 ) self.graph.add_edge( (x, z, rotation, elevation), (x, z, (rotation+270) % 360, elevation), weight=1 ) if elevation != 60: self.graph.add_edge( (x, z, rotation, elevation), (x, z, rotation, elevation+15), weight=1 ) if elevation != -30: self.graph.add_edge( (x, z, rotation, elevation), (x, z, rotation, elevation-15), weight=1 ) # Transition connectivity ROTATIONS = [0, 90, 180, 270] DIRECTIONS = [[0, 1], [1, 0], [0, -1], [-1, 0]] for rotation in range(0, 360, 90): for elevation in range(-30, 60+1, 15): for i in range(len(self.points_quantized)): x, z = self.points_quantized[i,:2] direction = DIRECTIONS[ROTATIONS.index(rotation)] _x = x + direction[0] _z = z + direction[1] # if self.memory[_x, _z] == 1: if ((self.points_quantized[:,0] == _x) * (self.points_quantized[:,1] == _z)).any(): self.graph.add_edge( (x, z, rotation, elevation), (_x, _z, rotation, elevation), weight=1 ) def get_plan(self): not_in_count = 0 start = self.current_waypoint while not_in_count < 50: try: # random selection # p = random.choice(self.points_quantized) # semantic search # - advantage for points near receptacles probs_receps = [] for i in range(len(self.points_quantized)): distances = np.linalg.norm( self.points_receps[:,:2] - self.points_quantized[i,:2], axis=1) min_distance = np.min(distances) probs_receps.append(min_distance) probs_receps = np.array(probs_receps) probs_receps /= probs_receps.sum() probs_receps = (probs_receps.max() - probs_receps) ** 4 probs_receps /= probs_receps.sum() # - penalty for points near visited points probs_visited = [] for i in range(len(self.points_visited)): distances = np.linalg.norm( np.array(self.points_visited)[:,:2] - self.points_quantized[i,:2], axis=1) min_distance = np.min(distances) probs_visited.append(min_distance) probs_visited = np.array(probs_receps) probs_visited /= probs_visited.sum() probs_visited = probs_receps ** 4 probs_visited /= probs_visited.sum() probs = probs_receps + probs_visited*10 # - advantage for points near task-relevant objects/receptacles probs_task = [] points_task = np.vstack(( self.points_object_target, self.points_toggle_target, self.points_object_sliced, self.points_mrecep_target, self.points_parent_target, self.points_task_specific, )) if len(points_task) > 0: for i in range(len(self.points_quantized)): distances = np.linalg.norm( points_task[:,:2] - self.points_quantized[i,:2], axis=1) min_distance = np.min(distances) probs_task.append(min_distance) probs_task = np.array(probs_task) probs_task /= probs_task.sum() probs_task = (probs_task.max() - probs_task) ** 4 probs_task /= probs_task.sum() probs_task += probs_task # kill visited points for i in range(len(self.points_quantized)): if any(self.points_quantized[i,0] == p[0] and \ self.points_quantized[i,1] == p[1] for p in self.points_visited): probs[i] = 0 probs /= probs.sum() p = self.points_quantized[np.random.choice(len(self.points_quantized), 1, p=probs)[0]] if not (p[0] == start[0] and p[1] == start[1]): target = (p[0], p[1], start[2], start[3]) if target in self.graph.nodes(): try: nodes = nx.astar_path(self.graph, start, target) if False:#len(nodes) <= 1: return ['<>'] else: self.target = o3d.geometry.TriangleMesh.create_sphere(0.6) self.target.translate([target[0], target[1], -6.1]) self.target.paint_uniform_color([1, 0, 1]) actions = self.nodes_to_actions(nodes) return actions, nodes[1:] except Exception as e: print(e) not_in_count += 1 continue else: print(target, 'is not in self.graph') not_in_count += 1 continue except Exception as e: print(e) return None, None # not in count reached maximum return None, None def get_distance_transform(self, img): ret, thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) dist_transform = cv2.distanceTransform(thresh, cv2.DIST_L2, 5) result = cv2.normalize(dist_transform, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8UC1) result = result.astype(np.float32) / 255 return result # Input : [N, 4] nodes # Output: action sequence def nodes_to_actions(self, nodes): actions = [] for i in range(1, len(nodes)): prev = nodes[i - 1] curr = nodes[i] # MoveAhead if prev[2] == curr[2] and prev[3] == curr[3]: actions.append('MoveAhead_25') # RotateRight elif (prev[2] + 90) % 360 == curr[2]: actions.append('RotateRight_90') # RotateLeft elif (prev[2] + 270) % 360 == curr[2]: actions.append('RotateLeft_90') # LookUp elif max(-30, prev[3] - 15) == curr[3]: actions.append('LookUp_15') # LookDown elif min(60, prev[3] + 15) == curr[3]: actions.append('LookDown_15') # this should not happen else: print('Unexpected action decoding') return actions # import cv2 # import numpy as np # import networkx as nx # from util import depth_to_world_coordinates # # # class ExplorationAgent: # # def __init__(self, map_size=100): # # # navigation (grid) graph # self.graph = nx.DiGraph() # self.current_waypoint = (0, 0, 0, 0) # # self.map_size = map_size # self.offset_x = self.map_size // 2 # self.offset_z = self.map_size // 2 # # self.memory = np.zeros((self.map_size, self.map_size), dtype=np.uint8) # self.movable = np.ones((self.map_size, self.map_size), dtype=np.uint8) # self.visited = np.zeros((self.map_size, self.map_size), dtype=np.uint8) # self.explorable = np.zeros((self.map_size, self.map_size), dtype=np.uint8) # # self.masks = {} # # self.num_erosion = 1 # # # self.points_all = [] # # @property # def current_waypoint_str(self): # return '{}|{}|{}|{}'.format(*list(self.current_waypoint)) # # # def remove_node(self, node): # for rotation in range(0, 360, 90): # for horizon in range(-30, 60+1, 15): # n = (node[0], node[1], rotation, horizon) # if n in self.graph: # self.graph.remove_node(n) # self.movable[node[0],node[1]] = 0 # # # def set_current_position(self, event): # x = np.round(event.metadata['agent']['position']['x'] / 0.25).astype(np.int32) + self.offset_x # z = np.round(event.metadata['agent']['position']['z'] / 0.25).astype(np.int32) + self.offset_z # rotation = int((event.metadata['agent']['rotation']['y'] + 360) % 360) # for r in range(0, 360, 90): # if abs(rotation % 360 - r) < 5: # rotation = r # break # horizon = event.metadata['agent']['cameraHorizon'] # for r in range(-30, 60+1, 15): # if abs(horizon - r) < 5: # horizon = r # break # self.current_waypoint = (int(x), int(z), rotation, horizon) # self.visited[self.current_waypoint[0],self.current_waypoint[1]] = 1 # # # def update_memory(self, depth, metadata, masks_objects, masks_receps): # # store objects/receps masks # if self.current_waypoint_str not in self.masks: # self.masks[self.current_waypoint_str] = { # 'objects': masks_objects, # 'receps': masks_receps, # } # # memory = np.zeros_like(self.memory) # # # depth map # points = depth_to_world_coordinates(depth, self.current_waypoint, 0) # # points = np.round(points / 0.25).astype(np.int32).reshape(-1, 3) // 4 # # points = np.round(points, 1).astype(np.int32).reshape(-1, 3) # points = points.reshape(-1, 3) # points = points[(abs(points[:,0] % 1) < 0.1) * (abs(points[:,1] % 1) < 0.1)] # points = np.round(points) # points = np.unique(points, axis=0) # points = points[points[:,2] < -5.5] # points = points[points[:,2] > -7.5] # points = points.astype(np.int32) # for x, z in points[:,:2]: # memory[x,z] = 1 # # # erosion # for _ in range(self.num_erosion): # _memory = memory.copy() # for x, z in zip(*memory.nonzero()): # if (0 < x and x < self.map_size - 1) and (0 < z and z < self.map_size - 1): # if not all([memory[x+1][z] == 1, memory[x-1][z] == 1, # memory[x][z+1] == 1, memory[x][z-1] == 1]): # _memory[x][z] = 0 # memory = _memory # # # current pose # memory[self.current_waypoint[0],self.current_waypoint[1]] = 1 # # self.memory += memory # self.memory *= self.movable # self.memory[self.memory > 0] = 1 # self.memory += self.visited # self.memory[self.memory > 0] = 1 # # # Rotation / elevation connectivity # for x, z in zip(*memory.nonzero()): # for r, rotation in enumerate(range(0, 360, 90)): # for e, elevation in enumerate(range(-30, 60+1, 15)): # self.graph.add_node((x, z, rotation, elevation), weight=1) # self.graph.add_edge( # (x, z, rotation, elevation), # (x, z, (rotation+90) % 360, elevation), # weight=1 # ) # self.graph.add_edge( # (x, z, rotation, elevation), # (x, z, (rotation+270) % 360, elevation), # weight=1 # ) # if elevation != 60: # self.graph.add_edge( # (x, z, rotation, elevation), # (x, z, rotation, elevation+15), # weight=1 # ) # if elevation != -30: # self.graph.add_edge( # (x, z, rotation, elevation), # (x, z, rotation, elevation-15), # weight=1 # ) # # # Transition connectivity # ROTATIONS = [0, 90, 180, 270] # DIRECTIONS = [[0, 1], [1, 0], [0, -1], [-1, 0]] # for rotation in range(0, 360, 90): # for elevation in range(-30, 60+1, 15): # for x, z in zip(*memory.nonzero()): # direction = DIRECTIONS[ROTATIONS.index(rotation)] # _x = x + direction[0] # _z = z + direction[1] # if self.memory[_x, _z] == 1: # self.graph.add_edge( # (x, z, rotation, elevation), # (_x, _z, rotation, elevation), # weight=1 # ) # # # def get_plan(self): # not_in_count = 0 # start = self.current_waypoint # while not_in_count < 50: # try: # memory = self.memory.copy() # visited = self.visited.copy() # for _ in range(10): # _visited = np.zeros_like(visited) # for i, j in zip(*visited.nonzero()): # _visited[i-1:i+1,j-1:j+1] = 1 # visited = _visited # # memory[visited > 0] = 0 # # import random # p = random.choice(np.vstack(memory.nonzero()).T.tolist()) # if not (p[0] == start[0] and p[1] == start[1]): # target = (p[0], p[1], start[2], start[3]) # if target in self.graph.nodes(): # try: # nodes = nx.astar_path(self.graph, start, target) # if False:#len(nodes) <= 1: # return ['<>'] # else: # actions = self.nodes_to_actions(nodes) # return actions, nodes[1:] # except Exception as e: # print(e) # not_in_count += 1 # continue # else: # print(target, 'is not in self.graph') # not_in_count += 1 # continue # # except Exception as e: # print(e) # return None, None # # # not in count reached maximum # return None, None # # # def get_distance_transform(self, img): # ret, thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # dist_transform = cv2.distanceTransform(thresh, cv2.DIST_L2, 5) # result = cv2.normalize(dist_transform, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8UC1) # result = result.astype(np.float32) / 255 # return result # # # # Input : [N, 4] nodes # # Output: action sequence # def nodes_to_actions(self, nodes): # actions = [] # # for i in range(1, len(nodes)): # prev = nodes[i - 1] # curr = nodes[i] # # # MoveAhead # if prev[2] == curr[2] and prev[3] == curr[3]: # actions.append('MoveAhead_25') # # # RotateRight # elif (prev[2] + 90) % 360 == curr[2]: # actions.append('RotateRight_90') # # # RotateLeft # elif (prev[2] + 270) % 360 == curr[2]: # actions.append('RotateLeft_90') # # # LookUp # elif max(-30, prev[3] - 15) == curr[3]: # actions.append('LookUp_15') # # # LookDown # elif min(60, prev[3] + 15) == curr[3]: # actions.append('LookDown_15') # # # this should not happen # else: # print('Unexpected action decoding') # # return actions