Spaces:

bulatko
/

zoo3d

Paused

App Files Files Community

zoo3d / MaskClustering /dataset /scannetpp.py

bulatko

adding real MK

55e58d1 4 days ago

raw

history blame

7.83 kB

	import open3d as o3d
	import numpy as np
	import os
	import cv2
	import collections
	from evaluation.constants import SCANNETPP_LABELS, SCANNETPP_IDS
	import torch

	BaseImage = collections.namedtuple(
	"Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
	BaseCamera = collections.namedtuple(
	"Camera", ["id", "model", "width", "height", "params"])


	def qvec2rotmat(qvec):
	return np.array([
	[1 - 2 * qvec[2]*2 - 2 qvec[3]**2,
	2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
	2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
	[2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
	1 - 2 * qvec[1]*2 - 2 qvec[3]**2,
	2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
	[2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
	2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
	1 - 2 * qvec[1]*2 - 2 qvec[2]**2]])


	class Image(BaseImage):
	def qvec2rotmat(self):
	return qvec2rotmat(self.qvec)

	@property
	def world_to_camera(self) -> np.ndarray:
	R = qvec2rotmat(self.qvec)
	t = self.tvec
	world2cam = np.eye(4)
	world2cam[:3, :3] = R
	world2cam[:3, 3] = t
	return world2cam


	class Camera(BaseCamera):
	@property
	def K(self):
	K = np.eye(3)
	if self.model == "SIMPLE_PINHOLE" or self.model == "SIMPLE_RADIAL" or self.model == "RADIAL" or self.model == "SIMPLE_RADIAL_FISHEYE" or self.model == "RADIAL_FISHEYE":
	K[0, 0] = self.params[0]
	K[1, 1] = self.params[0]
	K[0, 2] = self.params[1]
	K[1, 2] = self.params[2]
	elif self.model == "PINHOLE" or self.model == "OPENCV" or self.model == "OPENCV_FISHEYE" or self.model == "FULL_OPENCV" or self.model == "FOV" or self.model == "THIN_PRISM_FISHEYE":
	K[0, 0] = self.params[0]
	K[1, 1] = self.params[1]
	K[0, 2] = self.params[2]
	K[1, 2] = self.params[3]
	else:
	raise NotImplementedError
	return K


	def read_images_text(path):
	images = {}
	with open(path, "r") as fid:
	while True:
	line = fid.readline()
	if not line:
	break
	line = line.strip()
	if len(line) > 0 and line[0] != "#":
	elems = line.split()
	image_id = int(elems[0])
	qvec = np.array(tuple(map(float, elems[1:5])))
	tvec = np.array(tuple(map(float, elems[5:8])))
	camera_id = int(elems[8])
	image_name = elems[9]
	elems = fid.readline().split()
	xys = np.column_stack([tuple(map(float, elems[0::3])),
	tuple(map(float, elems[1::3]))])
	point3D_ids = np.array(tuple(map(int, elems[2::3])))
	images[image_id] = Image(
	id=image_id, qvec=qvec, tvec=tvec,
	camera_id=camera_id, name=image_name,
	xys=xys, point3D_ids=point3D_ids)
	return images


	def read_cameras_text(path):
	"""
	see: src/base/reconstruction.cc
	void Reconstruction::WriteCamerasText(const std::string& path)
	void Reconstruction::ReadCamerasText(const std::string& path)
	"""
	cameras = {}
	with open(path, "r") as fid:
	while True:
	line = fid.readline()
	if not line:
	break
	line = line.strip()
	if len(line) > 0 and line[0] != "#":
	elems = line.split()
	camera_id = int(elems[0])
	model = elems[1]
	width = int(elems[2])
	height = int(elems[3])
	params = np.array(tuple(map(float, elems[4:])))
	cameras[camera_id] = Camera(id=camera_id, model=model,
	width=width, height=height,
	params=params)
	return cameras


	class ScanNetPPDataset:

	def __init__(self, seq_name) -> None:
	self.seq_name = seq_name
	self.root = f'./data/scannetpp/data/{seq_name}'
	self.rgb_dir = f'{self.root}/iphone/rgb'
	self.depth_dir = f'{self.root}/iphone/render_depth'
	self.segmentation_dir = f'{self.root}/output/mask'
	self.object_dict_dir = f'{self.root}/output/object'
	self.point_cloud_path = f'./data/scannetpp/pcld_0.25/{seq_name}.pth'
	self.load_meta_data()

	self.depth_scale = 1000.0
	self.image_size = (1920, 1440)


	def load_meta_data(self):
	self.frame_id_list = []

	cameras = read_cameras_text(os.path.join(self.root, 'iphone/colmap', "cameras.txt"))
	images = read_images_text(os.path.join(self.root, 'iphone/colmap', "images.txt"))
	camera = next(iter(cameras.values()))
	fx, fy, cx, cy = camera.params[:4]
	intrinsics = {}
	extrinsics = {}

	for _, image in (images.items()):
	image_id = int(image.name.split('.')[0].split('_')[1])
	self.frame_id_list.append(image_id)
	world_to_camera = image.world_to_camera
	extrinsics[image_id] = np.linalg.inv(world_to_camera)
	intrinsics[image_id] = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])

	self.extrinsics = extrinsics
	self.intrinsics = intrinsics


	def get_frame_list(self, stride):
	return self.frame_id_list[::stride]


	def get_intrinsics(self, frame_id):
	intrinsic_matrix = self.intrinsics[frame_id]

	intrinisc_cam_parameters = o3d.camera.PinholeCameraIntrinsic()
	intrinisc_cam_parameters.set_intrinsics(self.image_size[0], self.image_size[1], intrinsic_matrix[0, 0], intrinsic_matrix[1, 1], intrinsic_matrix[0, 2], intrinsic_matrix[1, 2])
	return intrinisc_cam_parameters


	def get_extrinsic(self, frame_id):
	return self.extrinsics[frame_id]


	def get_depth(self, frame_id):
	depth_path = os.path.join(self.depth_dir, 'frame_%06d.png' % frame_id)
	depth = cv2.imread(depth_path, -1)
	depth = depth / self.depth_scale
	depth = depth.astype(np.float32)
	return depth


	def get_rgb(self, frame_id, change_color=True):
	rgb_path = os.path.join(self.rgb_dir, 'frame_%06d.jpg' % frame_id)
	rgb = cv2.imread(rgb_path)
	if change_color:
	rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
	return rgb


	def get_segmentation(self, frame_id, align_with_depth=False):
	segmentation_path = os.path.join(self.segmentation_dir, 'frame_%06d.png' % frame_id)
	if not os.path.exists(segmentation_path):
	assert False, f"Segmentation not found: {segmentation_path}"
	segmentation = cv2.imread(segmentation_path, cv2.IMREAD_UNCHANGED)
	return segmentation


	def get_frame_path(self, frame_id):
	rgb_path = os.path.join(self.rgb_dir, 'frame_%06d.jpg' % frame_id)
	segmentation_path = os.path.join(self.segmentation_dir, 'frame_%06d.png' % frame_id)
	return rgb_path, segmentation_path


	def get_label_features(self):
	label_features_dict = np.load(f'data/text_features/scannetpp.npy', allow_pickle=True).item()
	return label_features_dict


	def get_scene_points(self):
	data = torch.load(self.point_cloud_path)
	points = np.asarray(data['sampled_coords'])
	return points


	def get_label_id(self):
	self.class_id = SCANNETPP_IDS
	self.class_label = SCANNETPP_LABELS

	self.label2id = {}
	self.id2label = {}
	for label, id in zip(self.class_label, self.class_id):
	self.label2id[label] = id
	self.id2label[id] = label

	return self.label2id, self.id2label