Spaces:

ziqima
/

Find3D

Running

App Files Files Community

Find3D / Pointcept /pointcept /datasets /preprocessing /structured3d /preprocess_structured3d.py

ziqima

initial commit

4893ce0 about 1 year ago

raw

history blame contribute delete

14.5 kB

	"""
	Preprocessing Script for Structured3D

	Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
	Please cite our work if the code is helpful to you.
	"""

	import argparse
	import io
	import os
	import PIL
	from PIL import Image
	import cv2
	import zipfile
	import numpy as np
	import multiprocessing as mp
	from concurrent.futures import ProcessPoolExecutor
	from itertools import repeat


	VALID_CLASS_IDS_25 = (
	1,
	2,
	3,
	4,
	5,
	6,
	7,
	8,
	9,
	11,
	14,
	15,
	16,
	17,
	18,
	19,
	22,
	24,
	25,
	32,
	34,
	35,
	38,
	39,
	40,
	)
	CLASS_LABELS_25 = (
	"wall",
	"floor",
	"cabinet",
	"bed",
	"chair",
	"sofa",
	"table",
	"door",
	"window",
	"picture",
	"desk",
	"shelves",
	"curtain",
	"dresser",
	"pillow",
	"mirror",
	"ceiling",
	"refrigerator",
	"television",
	"nightstand",
	"sink",
	"lamp",
	"otherstructure",
	"otherfurniture",
	"otherprop",
	)


	def normal_from_cross_product(points_2d: np.ndarray) -> np.ndarray:
	xyz_points_pad = np.pad(points_2d, ((0, 1), (0, 1), (0, 0)), mode="symmetric")
	xyz_points_ver = (xyz_points_pad[:, :-1, :] - xyz_points_pad[:, 1:, :])[:-1, :, :]
	xyz_points_hor = (xyz_points_pad[:-1, :, :] - xyz_points_pad[1:, :, :])[:, :-1, :]
	xyz_normal = np.cross(xyz_points_hor, xyz_points_ver)
	xyz_dist = np.linalg.norm(xyz_normal, axis=-1, keepdims=True)
	xyz_normal = np.divide(
	xyz_normal, xyz_dist, out=np.zeros_like(xyz_normal), where=xyz_dist != 0
	)
	return xyz_normal


	class Structured3DReader:
	def __init__(self, files):
	super().__init__()
	if isinstance(files, str):
	files = [files]
	self.readers = [zipfile.ZipFile(f, "r") for f in files]
	self.names_mapper = dict()
	for idx, reader in enumerate(self.readers):
	for name in reader.namelist():
	self.names_mapper[name] = idx

	def filelist(self):
	return list(self.names_mapper.keys())

	def listdir(self, dir_name):
	dir_name = dir_name.lstrip(os.path.sep).rstrip(os.path.sep)
	file_list = list(
	np.unique(
	[
	f.replace(dir_name + os.path.sep, "", 1).split(os.path.sep)[0]
	for f in self.filelist()
	if f.startswith(dir_name + os.path.sep)
	]
	)
	)
	if "" in file_list:
	file_list.remove("")
	return file_list

	def read(self, file_name):
	split = self.names_mapper[file_name]
	return self.readers[split].read(file_name)

	def read_camera(self, camera_path):
	z2y_top_m = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]], dtype=np.float32)
	cam_extr = np.fromstring(self.read(camera_path), dtype=np.float32, sep=" ")
	cam_t = np.matmul(z2y_top_m, cam_extr[:3] / 1000)
	if cam_extr.shape[0] > 3:
	cam_front, cam_up = cam_extr[3:6], cam_extr[6:9]
	cam_n = np.cross(cam_front, cam_up)
	cam_r = np.stack((cam_front, cam_up, cam_n), axis=1).astype(np.float32)
	cam_r = np.matmul(z2y_top_m, cam_r)
	cam_f = cam_extr[9:11]
	else:
	cam_r = np.eye(3, dtype=np.float32)
	cam_f = None
	return cam_r, cam_t, cam_f

	def read_depth(self, depth_path):
	depth = cv2.imdecode(
	np.frombuffer(self.read(depth_path), np.uint8), cv2.IMREAD_UNCHANGED
	)[..., np.newaxis]
	depth[depth == 0] = 65535
	return depth

	def read_color(self, color_path):
	color = cv2.imdecode(
	np.frombuffer(self.read(color_path), np.uint8), cv2.IMREAD_UNCHANGED
	)[..., :3][..., ::-1]
	return color

	def read_segment(self, segment_path):
	segment = np.array(PIL.Image.open(io.BytesIO(self.read(segment_path))))[
	..., np.newaxis
	]
	return segment


	def parse_scene(
	scene,
	dataset_root,
	output_root,
	ignore_index=-1,
	grid_size=None,
	fuse_prsp=True,
	fuse_pano=True,
	vis=False,
	):
	assert fuse_prsp or fuse_pano
	reader = Structured3DReader(
	[
	os.path.join(dataset_root, f)
	for f in os.listdir(dataset_root)
	if f.endswith(".zip")
	]
	)
	scene_id = int(os.path.basename(scene).split("_")[-1])
	if scene_id < 3000:
	split = "train"
	elif 3000 <= scene_id < 3250:
	split = "val"
	else:
	split = "test"

	print(f"Processing: {scene} in {split}")
	rooms = reader.listdir(os.path.join("Structured3D", scene, "2D_rendering"))
	for room in rooms:
	room_path = os.path.join("Structured3D", scene, "2D_rendering", room)
	coord_list = list()
	color_list = list()
	normal_list = list()
	segment_list = list()
	if fuse_prsp:
	prsp_path = os.path.join(room_path, "perspective", "full")
	frames = reader.listdir(prsp_path)

	for frame in frames:
	try:
	cam_r, cam_t, cam_f = reader.read_camera(
	os.path.join(prsp_path, frame, "camera_pose.txt")
	)
	depth = reader.read_depth(
	os.path.join(prsp_path, frame, "depth.png")
	)
	color = reader.read_color(
	os.path.join(prsp_path, frame, "rgb_rawlight.png")
	)
	segment = reader.read_segment(
	os.path.join(prsp_path, frame, "semantic.png")
	)
	except:
	print(
	f"Skipping {scene}_room{room}_frame{frame} perspective view due to loading error"
	)
	else:
	fx, fy = cam_f
	height, width = depth.shape[0], depth.shape[1]
	pixel = np.transpose(np.indices((width, height)), (2, 1, 0))
	pixel = pixel.reshape((-1, 2))
	pixel = np.hstack((pixel, np.ones((pixel.shape[0], 1))))
	k = np.diag([1.0, 1.0, 1.0])

	k[0, 2] = width / 2
	k[1, 2] = height / 2

	k[0, 0] = k[0, 2] / np.tan(fx)
	k[1, 1] = k[1, 2] / np.tan(fy)
	coord = (
	depth.reshape((-1, 1)) * (np.linalg.inv(k) @ pixel.T).T
	).reshape(height, width, 3)
	coord = coord @ np.array([[0, 0, 1], [0, -1, 0], [1, 0, 0]])
	normal = normal_from_cross_product(coord)

	# Filtering invalid points
	view_dist = np.maximum(
	np.linalg.norm(coord, axis=-1, keepdims=True), float(10e-5)
	)
	cosine_dist = np.sum(
	(coord * normal / view_dist), axis=-1, keepdims=True
	)
	cosine_dist = np.abs(cosine_dist)
	mask = ((cosine_dist > 0.15) & (depth < 65535) & (segment > 0))[
	..., 0
	].reshape(-1)

	coord = np.matmul(coord / 1000, cam_r.T) + cam_t
	normal = normal_from_cross_product(coord)

	if sum(mask) > 0:
	coord_list.append(coord.reshape(-1, 3)[mask])
	color_list.append(color.reshape(-1, 3)[mask])
	normal_list.append(normal.reshape(-1, 3)[mask])
	segment_list.append(segment.reshape(-1, 1)[mask])
	else:
	print(
	f"Skipping {scene}_room{room}_frame{frame} perspective view due to all points are filtered out"
	)

	if fuse_pano:
	pano_path = os.path.join(room_path, "panorama")
	try:
	_, cam_t, _ = reader.read_camera(
	os.path.join(pano_path, "camera_xyz.txt")
	)
	depth = reader.read_depth(os.path.join(pano_path, "full", "depth.png"))
	color = reader.read_color(
	os.path.join(pano_path, "full", "rgb_rawlight.png")
	)
	segment = reader.read_segment(
	os.path.join(pano_path, "full", "semantic.png")
	)
	except:
	print(f"Skipping {scene}_room{room} panorama view due to loading error")
	else:
	p_h, p_w = depth.shape[:2]
	p_a = np.arange(p_w, dtype=np.float32) / p_w * 2 * np.pi - np.pi
	p_b = np.arange(p_h, dtype=np.float32) / p_h * np.pi * -1 + np.pi / 2
	p_a = np.tile(p_a[None], [p_h, 1])[..., np.newaxis]
	p_b = np.tile(p_b[:, None], [1, p_w])[..., np.newaxis]
	p_a_sin, p_a_cos, p_b_sin, p_b_cos = (
	np.sin(p_a),
	np.cos(p_a),
	np.sin(p_b),
	np.cos(p_b),
	)
	x = depth * p_a_cos * p_b_cos
	y = depth * p_b_sin
	z = depth * p_a_sin * p_b_cos
	coord = np.concatenate([x, y, z], axis=-1) / 1000
	normal = normal_from_cross_product(coord)

	# Filtering invalid points
	view_dist = np.maximum(
	np.linalg.norm(coord, axis=-1, keepdims=True), float(10e-5)
	)
	cosine_dist = np.sum(
	(coord * normal / view_dist), axis=-1, keepdims=True
	)
	cosine_dist = np.abs(cosine_dist)
	mask = ((cosine_dist > 0.15) & (depth < 65535) & (segment > 0))[
	..., 0
	].reshape(-1)
	coord = coord + cam_t

	if sum(mask) > 0:
	coord_list.append(coord.reshape(-1, 3)[mask])
	color_list.append(color.reshape(-1, 3)[mask])
	normal_list.append(normal.reshape(-1, 3)[mask])
	segment_list.append(segment.reshape(-1, 1)[mask])
	else:
	print(
	f"Skipping {scene}_room{room} panorama view due to all points are filtered out"
	)

	if len(coord_list) > 0:
	coord = np.concatenate(coord_list, axis=0)
	coord = coord @ np.array([[1, 0, 0], [0, 0, 1], [0, 1, 0]])
	color = np.concatenate(color_list, axis=0)
	normal = np.concatenate(normal_list, axis=0)
	normal = normal @ np.array([[1, 0, 0], [0, 0, 1], [0, 1, 0]])
	segment = np.concatenate(segment_list, axis=0)
	segment25 = np.ones_like(segment, dtype=np.int64) * ignore_index
	for idx, value in enumerate(VALID_CLASS_IDS_25):
	mask = np.all(segment == value, axis=-1)
	segment25[mask] = idx

	data_dict = dict(
	coord=coord.astype(np.float32),
	color=color.astype(np.uint8),
	normal=normal.astype(np.float32),
	segment=segment25.astype(np.int16),
	)
	# Grid sampling data
	if grid_size is not None:
	grid_coord = np.floor(coord / grid_size).astype(int)
	_, idx = np.unique(grid_coord, axis=0, return_index=True)
	coord = coord[idx]
	for key in data_dict.keys():
	data_dict[key] = data_dict[key][idx]

	# Save data
	save_path = os.path.join(
	output_root, split, os.path.basename(scene), f"room_{room}"
	)
	os.makedirs(save_path, exist_ok=True)
	for key in data_dict.keys():
	np.save(os.path.join(save_path, f"{key}.npy"), data_dict[key])

	if vis:
	from pointcept.utils.visualization import save_point_cloud

	os.makedirs("./vis", exist_ok=True)
	save_point_cloud(
	coord, color / 255, f"./vis/{scene}_room{room}_color.ply"
	)
	save_point_cloud(
	coord, (normal + 1) / 2, f"./vis/{scene}_room{room}_normal.ply"
	)
	else:
	print(f"Skipping {scene}_room{room} due to no valid points")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--dataset_root",
	required=True,
	help="Path to the ScanNet dataset containing scene folders.",
	)
	parser.add_argument(
	"--output_root",
	required=True,
	help="Output path where train/val folders will be located.",
	)
	parser.add_argument(
	"--num_workers",
	default=mp.cpu_count(),
	type=int,
	help="Num workers for preprocessing.",
	)
	parser.add_argument(
	"--grid_size", default=None, type=float, help="Grid size for grid sampling."
	)
	parser.add_argument("--ignore_index", default=-1, type=float, help="Ignore index.")
	parser.add_argument(
	"--fuse_prsp", action="store_true", help="Whether fuse perspective view."
	)
	parser.add_argument(
	"--fuse_pano", action="store_true", help="Whether fuse panorama view."
	)
	config = parser.parse_args()

	reader = Structured3DReader(
	[
	os.path.join(config.dataset_root, f)
	for f in os.listdir(config.dataset_root)
	if f.endswith(".zip")
	]
	)

	scenes_list = reader.listdir("Structured3D")
	scenes_list = sorted(scenes_list)
	os.makedirs(os.path.join(config.output_root, "train"), exist_ok=True)
	os.makedirs(os.path.join(config.output_root, "val"), exist_ok=True)
	os.makedirs(os.path.join(config.output_root, "test"), exist_ok=True)

	# Preprocess data.
	print("Processing scenes...")
	pool = ProcessPoolExecutor(max_workers=config.num_workers)
	_ = list(
	pool.map(
	parse_scene,
	scenes_list,
	repeat(config.dataset_root),
	repeat(config.output_root),
	repeat(config.ignore_index),
	repeat(config.grid_size),
	repeat(config.fuse_prsp),
	repeat(config.fuse_pano),
	)
	)
	pool.shutdown()