|
|
import open3d as o3d |
|
|
import numpy as np |
|
|
import os |
|
|
import cv2 |
|
|
import collections |
|
|
from evaluation.constants import SCANNETPP_LABELS, SCANNETPP_IDS |
|
|
import torch |
|
|
|
|
|
BaseImage = collections.namedtuple( |
|
|
"Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]) |
|
|
BaseCamera = collections.namedtuple( |
|
|
"Camera", ["id", "model", "width", "height", "params"]) |
|
|
|
|
|
|
|
|
def qvec2rotmat(qvec): |
|
|
return np.array([ |
|
|
[1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, |
|
|
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], |
|
|
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], |
|
|
[2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], |
|
|
1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, |
|
|
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], |
|
|
[2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], |
|
|
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], |
|
|
1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) |
|
|
|
|
|
|
|
|
class Image(BaseImage): |
|
|
def qvec2rotmat(self): |
|
|
return qvec2rotmat(self.qvec) |
|
|
|
|
|
@property |
|
|
def world_to_camera(self) -> np.ndarray: |
|
|
R = qvec2rotmat(self.qvec) |
|
|
t = self.tvec |
|
|
world2cam = np.eye(4) |
|
|
world2cam[:3, :3] = R |
|
|
world2cam[:3, 3] = t |
|
|
return world2cam |
|
|
|
|
|
|
|
|
class Camera(BaseCamera): |
|
|
@property |
|
|
def K(self): |
|
|
K = np.eye(3) |
|
|
if self.model == "SIMPLE_PINHOLE" or self.model == "SIMPLE_RADIAL" or self.model == "RADIAL" or self.model == "SIMPLE_RADIAL_FISHEYE" or self.model == "RADIAL_FISHEYE": |
|
|
K[0, 0] = self.params[0] |
|
|
K[1, 1] = self.params[0] |
|
|
K[0, 2] = self.params[1] |
|
|
K[1, 2] = self.params[2] |
|
|
elif self.model == "PINHOLE" or self.model == "OPENCV" or self.model == "OPENCV_FISHEYE" or self.model == "FULL_OPENCV" or self.model == "FOV" or self.model == "THIN_PRISM_FISHEYE": |
|
|
K[0, 0] = self.params[0] |
|
|
K[1, 1] = self.params[1] |
|
|
K[0, 2] = self.params[2] |
|
|
K[1, 2] = self.params[3] |
|
|
else: |
|
|
raise NotImplementedError |
|
|
return K |
|
|
|
|
|
|
|
|
def read_images_text(path): |
|
|
images = {} |
|
|
with open(path, "r") as fid: |
|
|
while True: |
|
|
line = fid.readline() |
|
|
if not line: |
|
|
break |
|
|
line = line.strip() |
|
|
if len(line) > 0 and line[0] != "#": |
|
|
elems = line.split() |
|
|
image_id = int(elems[0]) |
|
|
qvec = np.array(tuple(map(float, elems[1:5]))) |
|
|
tvec = np.array(tuple(map(float, elems[5:8]))) |
|
|
camera_id = int(elems[8]) |
|
|
image_name = elems[9] |
|
|
elems = fid.readline().split() |
|
|
xys = np.column_stack([tuple(map(float, elems[0::3])), |
|
|
tuple(map(float, elems[1::3]))]) |
|
|
point3D_ids = np.array(tuple(map(int, elems[2::3]))) |
|
|
images[image_id] = Image( |
|
|
id=image_id, qvec=qvec, tvec=tvec, |
|
|
camera_id=camera_id, name=image_name, |
|
|
xys=xys, point3D_ids=point3D_ids) |
|
|
return images |
|
|
|
|
|
|
|
|
def read_cameras_text(path): |
|
|
""" |
|
|
see: src/base/reconstruction.cc |
|
|
void Reconstruction::WriteCamerasText(const std::string& path) |
|
|
void Reconstruction::ReadCamerasText(const std::string& path) |
|
|
""" |
|
|
cameras = {} |
|
|
with open(path, "r") as fid: |
|
|
while True: |
|
|
line = fid.readline() |
|
|
if not line: |
|
|
break |
|
|
line = line.strip() |
|
|
if len(line) > 0 and line[0] != "#": |
|
|
elems = line.split() |
|
|
camera_id = int(elems[0]) |
|
|
model = elems[1] |
|
|
width = int(elems[2]) |
|
|
height = int(elems[3]) |
|
|
params = np.array(tuple(map(float, elems[4:]))) |
|
|
cameras[camera_id] = Camera(id=camera_id, model=model, |
|
|
width=width, height=height, |
|
|
params=params) |
|
|
return cameras |
|
|
|
|
|
|
|
|
class ScanNetPPDataset: |
|
|
|
|
|
def __init__(self, seq_name) -> None: |
|
|
self.seq_name = seq_name |
|
|
self.root = f'./data/scannetpp/data/{seq_name}' |
|
|
self.rgb_dir = f'{self.root}/iphone/rgb' |
|
|
self.depth_dir = f'{self.root}/iphone/render_depth' |
|
|
self.segmentation_dir = f'{self.root}/output/mask' |
|
|
self.object_dict_dir = f'{self.root}/output/object' |
|
|
self.point_cloud_path = f'./data/scannetpp/pcld_0.25/{seq_name}.pth' |
|
|
self.load_meta_data() |
|
|
|
|
|
self.depth_scale = 1000.0 |
|
|
self.image_size = (1920, 1440) |
|
|
|
|
|
|
|
|
def load_meta_data(self): |
|
|
self.frame_id_list = [] |
|
|
|
|
|
cameras = read_cameras_text(os.path.join(self.root, 'iphone/colmap', "cameras.txt")) |
|
|
images = read_images_text(os.path.join(self.root, 'iphone/colmap', "images.txt")) |
|
|
camera = next(iter(cameras.values())) |
|
|
fx, fy, cx, cy = camera.params[:4] |
|
|
intrinsics = {} |
|
|
extrinsics = {} |
|
|
|
|
|
for _, image in (images.items()): |
|
|
image_id = int(image.name.split('.')[0].split('_')[1]) |
|
|
self.frame_id_list.append(image_id) |
|
|
world_to_camera = image.world_to_camera |
|
|
extrinsics[image_id] = np.linalg.inv(world_to_camera) |
|
|
intrinsics[image_id] = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) |
|
|
|
|
|
self.extrinsics = extrinsics |
|
|
self.intrinsics = intrinsics |
|
|
|
|
|
|
|
|
def get_frame_list(self, stride): |
|
|
return self.frame_id_list[::stride] |
|
|
|
|
|
|
|
|
def get_intrinsics(self, frame_id): |
|
|
intrinsic_matrix = self.intrinsics[frame_id] |
|
|
|
|
|
intrinisc_cam_parameters = o3d.camera.PinholeCameraIntrinsic() |
|
|
intrinisc_cam_parameters.set_intrinsics(self.image_size[0], self.image_size[1], intrinsic_matrix[0, 0], intrinsic_matrix[1, 1], intrinsic_matrix[0, 2], intrinsic_matrix[1, 2]) |
|
|
return intrinisc_cam_parameters |
|
|
|
|
|
|
|
|
def get_extrinsic(self, frame_id): |
|
|
return self.extrinsics[frame_id] |
|
|
|
|
|
|
|
|
def get_depth(self, frame_id): |
|
|
depth_path = os.path.join(self.depth_dir, 'frame_%06d.png' % frame_id) |
|
|
depth = cv2.imread(depth_path, -1) |
|
|
depth = depth / self.depth_scale |
|
|
depth = depth.astype(np.float32) |
|
|
return depth |
|
|
|
|
|
|
|
|
def get_rgb(self, frame_id, change_color=True): |
|
|
rgb_path = os.path.join(self.rgb_dir, 'frame_%06d.jpg' % frame_id) |
|
|
rgb = cv2.imread(rgb_path) |
|
|
if change_color: |
|
|
rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) |
|
|
return rgb |
|
|
|
|
|
|
|
|
def get_segmentation(self, frame_id, align_with_depth=False): |
|
|
segmentation_path = os.path.join(self.segmentation_dir, 'frame_%06d.png' % frame_id) |
|
|
if not os.path.exists(segmentation_path): |
|
|
assert False, f"Segmentation not found: {segmentation_path}" |
|
|
segmentation = cv2.imread(segmentation_path, cv2.IMREAD_UNCHANGED) |
|
|
return segmentation |
|
|
|
|
|
|
|
|
def get_frame_path(self, frame_id): |
|
|
rgb_path = os.path.join(self.rgb_dir, 'frame_%06d.jpg' % frame_id) |
|
|
segmentation_path = os.path.join(self.segmentation_dir, 'frame_%06d.png' % frame_id) |
|
|
return rgb_path, segmentation_path |
|
|
|
|
|
|
|
|
def get_label_features(self): |
|
|
label_features_dict = np.load(f'data/text_features/scannetpp.npy', allow_pickle=True).item() |
|
|
return label_features_dict |
|
|
|
|
|
|
|
|
def get_scene_points(self): |
|
|
data = torch.load(self.point_cloud_path) |
|
|
points = np.asarray(data['sampled_coords']) |
|
|
return points |
|
|
|
|
|
|
|
|
def get_label_id(self): |
|
|
self.class_id = SCANNETPP_IDS |
|
|
self.class_label = SCANNETPP_LABELS |
|
|
|
|
|
self.label2id = {} |
|
|
self.id2label = {} |
|
|
for label, id in zip(self.class_label, self.class_id): |
|
|
self.label2id[label] = id |
|
|
self.id2label[id] = label |
|
|
|
|
|
return self.label2id, self.id2label |