Spaces:

allenai
/

WildDet3D

Running on Zero

App Files Files Community

WildDet3D / vis3d_glb.py

weikaih

WildDet3D Gradio demo

f71ac1d verified 17 days ago

raw

history blame contribute delete

21.1 kB

	"""Generate GLB scenes with colored point clouds / textured meshes and 3D boxes.

	Uses pygltflib for point cloud + wireframe boxes (GL_POINTS/GL_LINES),
	and trimesh + utils3d for textured mesh generation.

	Usage:
	from vis3d_glb import depth_to_pointcloud, create_scene_glb
	from vis3d_glb import create_mesh_scene_glb

	# Point cloud mode
	points, colors = depth_to_pointcloud(depth_map, image, intrinsics)
	create_scene_glb(points, colors, boxes3d_list, output_path)

	# Textured mesh mode (like MoGe2)
	create_mesh_scene_glb(depth_map, image, intrinsics, boxes3d_list, output_path)
	"""

	import numpy as np
	import pygltflib


	def depth_to_pointcloud(
	depth_map: np.ndarray,
	image: np.ndarray,
	intrinsics: np.ndarray,
	max_depth: float = 20.0,
	subsample: int = 4,
	padding: tuple[int, int, int, int] \| None = None,
	remove_edge: bool = True,
	edge_rtol: float = 0.04,
	confidence_map: np.ndarray \| None = None,
	confidence_threshold: float = 0.0,
	) -> tuple[np.ndarray, np.ndarray]:
	"""Convert depth map + RGB image to colored point cloud.

	Args:
	depth_map: (H, W) or (1, H, W) depth in meters.
	image: (H, W, 3) RGB image, uint8 [0-255].
	intrinsics: (3, 3) camera intrinsics matrix.
	max_depth: Discard points beyond this depth.
	subsample: Take every Nth pixel to reduce point count.
	padding: (left, right, top, bottom) CenterPad offsets to exclude.
	remove_edge: Remove points at depth discontinuity edges
	(like MoGe2). Uses utils3d.np.depth_map_edge.
	edge_rtol: Relative tolerance for edge detection. Larger
	values remove more aggressive edges.
	confidence_map: (H, W) or (1, H, W) per-pixel confidence in
	[0, 1]. Points below confidence_threshold are discarded.
	confidence_threshold: Minimum confidence to keep a point.

	Returns:
	points: (N, 3) float32 xyz in camera frame.
	colors: (N, 4) uint8 RGBA.
	"""
	# Handle various depth_map shapes
	while depth_map.ndim > 2:
	depth_map = depth_map.squeeze(0) # (1, 1, H, W) -> (H, W)

	H, W = depth_map.shape

	# Handle confidence_map shape
	if confidence_map is not None:
	while confidence_map.ndim > 2:
	confidence_map = confidence_map.squeeze(0)

	# Handle various image shapes: (1, H, W, 3), (1, 1, H, W) etc
	while image.ndim > 3:
	image = image.squeeze(0)
	# If image is (3, H, W), transpose to (H, W, 3)
	if image.ndim == 3 and image.shape[0] in (1, 3):
	image = np.transpose(image, (1, 2, 0))
	# If grayscale (H, W), repeat to (H, W, 3)
	if image.ndim == 2:
	image = np.stack([image] * 3, axis=-1)

	# Match image size to depth map
	if image.shape[0] != H or image.shape[1] != W:
	from PIL import Image as PILImage
	img_pil = PILImage.fromarray(image)
	img_pil = img_pil.resize((W, H), PILImage.BILINEAR)
	image = np.array(img_pil)

	# Build full-resolution valid mask before subsampling
	full_valid = (depth_map > 0.01) & (depth_map < max_depth) & np.isfinite(depth_map)

	# Exclude padding regions (full resolution)
	if padding is not None:
	pad_left, pad_right, pad_top, pad_bottom = padding
	pad_mask = np.ones((H, W), dtype=bool)
	pad_mask[:, :pad_left] = False
	pad_mask[:pad_top, :] = False
	if pad_right > 0:
	pad_mask[:, W - pad_right:] = False
	if pad_bottom > 0:
	pad_mask[H - pad_bottom:, :] = False
	full_valid &= pad_mask

	# Remove depth discontinuity edges (MoGe2 style)
	if remove_edge:
	import utils3d
	edge_mask = utils3d.np.depth_map_edge(depth_map, rtol=edge_rtol)
	full_valid &= ~edge_mask

	# Filter by confidence
	if confidence_map is not None and confidence_threshold > 0:
	full_valid &= (confidence_map >= confidence_threshold)

	# Subsample grid
	ys = np.arange(0, H, subsample)
	xs = np.arange(0, W, subsample)
	xx, yy = np.meshgrid(xs, ys)

	depth_sub = depth_map[yy, xx]
	rgb_sub = image[yy, xx] # (h, w, 3)
	valid = full_valid[yy, xx]

	# Unproject to 3D
	fx, fy = intrinsics[0, 0], intrinsics[1, 1]
	cx, cy = intrinsics[0, 2], intrinsics[1, 2]

	x3d = (xx[valid] - cx) * depth_sub[valid] / fx
	y3d = (yy[valid] - cy) * depth_sub[valid] / fy
	z3d = depth_sub[valid]

	# OpenCV (x-right, y-down, z-away) to glTF (x, -y, -z)
	points = np.stack([x3d, -y3d, -z3d], axis=-1).astype(np.float32)

	# Colors
	rgb = rgb_sub[valid] # (N, 3) uint8
	alpha = np.full((rgb.shape[0], 1), 255, dtype=np.uint8)
	colors = np.concatenate([rgb, alpha], axis=-1) # (N, 4)

	return points, colors


	def _quaternion_to_rotation_matrix(qw, qx, qy, qz):
	"""Convert quaternion to 3x3 rotation matrix."""
	return np.array([
	[1 - 2(qyqy + qzqz), 2(qxqy - qzqw), 2(qxqz + qy*qw)],
	[2(qxqy + qzqw), 1 - 2(qxqx + qzqz), 2(qyqz - qx*qw)],
	[2(qxqz - qyqw), 2(qyqz + qxqw), 1 - 2(qxqx + qy*qy)],
	], dtype=np.float32)


	def boxes3d_to_corners(boxes3d: np.ndarray) -> list[np.ndarray]:
	"""Convert 3D box params to 8 corner points in GLB coords.

	Args:
	boxes3d: (N, 10) boxes in OpenCV camera frame.
	Format: [cx, cy, cz, w, h, l, qw, qx, qy, qz]

	Returns:
	List of (8, 3) corner arrays in GLB/Three.js coords (y-up, z-backward).
	"""
	corners_list = []
	# Same transform as point cloud:
	# OpenCV (x,y,z) -> glTF (x, -y, -z)
	T = np.diag([1.0, -1.0, -1.0]).astype(np.float32)

	for box in boxes3d:
	cx, cy, cz = box[0], box[1], box[2]
	# Omni3D format: [width, length, height] not [w, h, l]
	# width = x-extent, length = z-extent, height = y-extent
	bw, bl, bh = box[3], box[4], box[5]
	qw, qx, qy, qz = box[6], box[7], box[8], box[9]

	hw, hl, hh = bw / 2, bl / 2, bh / 2

	# 8 local corners: x=length, y=height, z=width
	local_corners = np.array([
	[-hl, -hh, -hw],
	[ hl, -hh, -hw],
	[ hl, hh, -hw],
	[-hl, hh, -hw],
	[-hl, -hh, hw],
	[ hl, -hh, hw],
	[ hl, hh, hw],
	[-hl, hh, hw],
	], dtype=np.float32)

	# Rotate by quaternion and translate (in OpenCV coords)
	R_cv = _quaternion_to_rotation_matrix(qw, qx, qy, qz)
	corners_cv = (R_cv @ local_corners.T).T + np.array([cx, cy, cz])

	# Convert OpenCV -> glTF: (-z, -y, x)
	corners = (T @ corners_cv.T).T

	corners_list.append(corners.astype(np.float32))

	return corners_list


	def _generate_box_colors(n_boxes: int) -> list[list[int]]:
	"""Generate distinct colors for boxes."""
	base_colors = [
	[255, 0, 0, 255], # red
	[0, 255, 0, 255], # green
	[0, 100, 255, 255], # blue
	[255, 255, 0, 255], # yellow
	[255, 0, 255, 255], # magenta
	[0, 255, 255, 255], # cyan
	[255, 128, 0, 255], # orange
	[128, 0, 255, 255], # purple
	]
	colors = []
	for i in range(n_boxes):
	colors.append(base_colors[i % len(base_colors)])
	return colors


	def _pad_to_4(data: bytes) -> bytes:
	"""Pad binary data to 4-byte alignment (glTF requirement)."""
	remainder = len(data) % 4
	if remainder:
	data += b"\x00" * (4 - remainder)
	return data


	def create_scene_glb(
	points: np.ndarray,
	point_colors: np.ndarray,
	boxes3d_list: list[np.ndarray],
	output_path: str,
	max_points: int = 500000,
	) -> str:
	"""Create a GLB file with colored point cloud + wireframe 3D boxes.

	Args:
	points: (N, 3) float32 point cloud xyz.
	point_colors: (N, 4) uint8 RGBA colors.
	boxes3d_list: List of (M, 10) box arrays (one per image).
	output_path: Where to save the .glb file.
	max_points: Max number of points to include.

	Returns:
	output_path.
	"""
	# Subsample points if too many
	if len(points) > max_points:
	idx = np.random.choice(len(points), max_points, replace=False)
	points = points[idx]
	point_colors = point_colors[idx]

	points = np.ascontiguousarray(points, dtype=np.float32)
	point_colors = np.ascontiguousarray(point_colors, dtype=np.uint8)
	n_points = len(points)

	# Build box geometry
	all_corners_list = []
	for boxes3d in boxes3d_list:
	if len(boxes3d) > 0:
	corners = boxes3d_to_corners(boxes3d)
	all_corners_list.extend(corners)

	n_boxes = len(all_corners_list)
	box_colors_rgba = _generate_box_colors(n_boxes)

	# Box vertices and indices
	all_box_verts = []
	all_box_colors = []
	all_box_indices = []
	vertex_offset = 0

	edge_pairs = [
	(0, 1), (1, 2), (2, 3), (3, 0), # bottom face
	(4, 5), (5, 6), (6, 7), (7, 4), # top face
	(0, 4), (1, 5), (2, 6), (3, 7), # vertical edges
	]

	for i, corners in enumerate(all_corners_list):
	all_box_verts.append(corners)
	color = box_colors_rgba[i]
	all_box_colors.append(
	np.tile(np.array(color, dtype=np.uint8), (8, 1))
	)
	indices = np.array(
	[(a + vertex_offset, b + vertex_offset) for a, b in edge_pairs],
	dtype=np.uint16,
	)
	all_box_indices.append(indices)
	vertex_offset += 8

	has_boxes = n_boxes > 0

	if has_boxes:
	box_verts = np.concatenate(all_box_verts, axis=0).astype(np.float32)
	box_vert_colors = np.concatenate(all_box_colors, axis=0).astype(np.uint8)
	box_indices = np.concatenate(all_box_indices, axis=0).flatten().astype(np.uint16)
	else:
	box_verts = np.zeros((0, 3), dtype=np.float32)
	box_vert_colors = np.zeros((0, 4), dtype=np.uint8)
	box_indices = np.zeros(0, dtype=np.uint16)

	# Build binary blob
	points_bin = _pad_to_4(points.tobytes())
	colors_bin = _pad_to_4(point_colors.tobytes())
	box_verts_bin = _pad_to_4(box_verts.tobytes())
	box_colors_bin = _pad_to_4(box_vert_colors.tobytes())
	box_indices_bin = _pad_to_4(box_indices.tobytes())

	blob = points_bin + colors_bin + box_verts_bin + box_colors_bin + box_indices_bin

	# Build glTF structure
	buffer_views = []
	accessors = []
	offset = 0

	# BV0: point positions
	buffer_views.append(pygltflib.BufferView(
	buffer=0, byteOffset=offset, byteLength=len(points_bin),
	target=pygltflib.ARRAY_BUFFER,
	))
	accessors.append(pygltflib.Accessor(
	bufferView=0, componentType=pygltflib.FLOAT,
	count=n_points, type=pygltflib.VEC3,
	max=points.max(axis=0).tolist() if n_points > 0 else [0, 0, 0],
	min=points.min(axis=0).tolist() if n_points > 0 else [0, 0, 0],
	))
	offset += len(points_bin)

	# BV1: point colors
	buffer_views.append(pygltflib.BufferView(
	buffer=0, byteOffset=offset, byteLength=len(colors_bin),
	target=pygltflib.ARRAY_BUFFER,
	))
	accessors.append(pygltflib.Accessor(
	bufferView=1, componentType=pygltflib.UNSIGNED_BYTE,
	count=n_points, type=pygltflib.VEC4,
	normalized=True,
	))
	offset += len(colors_bin)

	nodes = []
	meshes = []

	# Point cloud mesh (GL_POINTS = mode 0)
	meshes.append(pygltflib.Mesh(
	primitives=[pygltflib.Primitive(
	attributes=pygltflib.Attributes(POSITION=0, COLOR_0=1),
	mode=0,
	)]
	))
	nodes.append(pygltflib.Node(mesh=0))

	if has_boxes:
	# BV2: box vertices
	buffer_views.append(pygltflib.BufferView(
	buffer=0, byteOffset=offset, byteLength=len(box_verts_bin),
	target=pygltflib.ARRAY_BUFFER,
	))
	accessors.append(pygltflib.Accessor(
	bufferView=2, componentType=pygltflib.FLOAT,
	count=len(box_verts), type=pygltflib.VEC3,
	max=box_verts.max(axis=0).tolist(),
	min=box_verts.min(axis=0).tolist(),
	))
	offset += len(box_verts_bin)

	# BV3: box colors
	buffer_views.append(pygltflib.BufferView(
	buffer=0, byteOffset=offset, byteLength=len(box_colors_bin),
	target=pygltflib.ARRAY_BUFFER,
	))
	accessors.append(pygltflib.Accessor(
	bufferView=3, componentType=pygltflib.UNSIGNED_BYTE,
	count=len(box_vert_colors), type=pygltflib.VEC4,
	normalized=True,
	))
	offset += len(box_colors_bin)

	# BV4: box indices
	buffer_views.append(pygltflib.BufferView(
	buffer=0, byteOffset=offset, byteLength=len(box_indices_bin),
	target=pygltflib.ELEMENT_ARRAY_BUFFER,
	))
	accessors.append(pygltflib.Accessor(
	bufferView=4, componentType=pygltflib.UNSIGNED_SHORT,
	count=len(box_indices), type=pygltflib.SCALAR,
	max=[int(box_indices.max())],
	min=[int(box_indices.min())],
	))
	offset += len(box_indices_bin)

	# Box wireframe mesh (GL_LINES = mode 1)
	meshes.append(pygltflib.Mesh(
	primitives=[pygltflib.Primitive(
	attributes=pygltflib.Attributes(POSITION=2, COLOR_0=3),
	indices=4,
	mode=1,
	)]
	))
	nodes.append(pygltflib.Node(mesh=1))

	gltf = pygltflib.GLTF2(
	scene=0,
	scenes=[pygltflib.Scene(nodes=list(range(len(nodes))))],
	nodes=nodes,
	meshes=meshes,
	accessors=accessors,
	bufferViews=buffer_views,
	buffers=[pygltflib.Buffer(byteLength=len(blob))],
	)
	gltf.set_binary_blob(blob)
	gltf.save(output_path)

	return output_path


	def _create_edge_cylinder(p1, p2, radius=0.01, sections=6):
	"""Create a thin cylinder mesh between two 3D points.

	Args:
	p1, p2: (3,) endpoints.
	radius: cylinder radius.
	sections: number of radial segments.

	Returns:
	trimesh.Trimesh or None if edge is degenerate.
	"""
	import trimesh

	segment = p2 - p1
	length = float(np.linalg.norm(segment))
	if length < 1e-6:
	return None

	cyl = trimesh.creation.cylinder(
	radius=radius, height=length, sections=sections
	)
	direction = segment / length

	# Align cylinder Z-axis to segment direction
	z_axis = np.array([0, 0, 1], dtype=np.float64)
	cross = np.cross(z_axis, direction)
	dot = np.dot(z_axis, direction)
	cross_len = np.linalg.norm(cross)

	if cross_len < 1e-6:
	R = np.eye(3) if dot > 0 else np.diag([1.0, -1.0, -1.0])
	else:
	cross_n = cross / cross_len
	angle = np.arccos(np.clip(dot, -1, 1))
	K = np.array([
	[0, -cross_n[2], cross_n[1]],
	[cross_n[2], 0, -cross_n[0]],
	[-cross_n[1], cross_n[0], 0],
	])
	R = np.eye(3) + np.sin(angle) * K + (1 - np.cos(angle)) * (K @ K)

	T = np.eye(4)
	T[:3, :3] = R
	T[:3, 3] = (p1 + p2) / 2.0
	cyl.apply_transform(T)
	return cyl


	def _create_wireframe_box_trimesh(corners, color_rgba, radius=0.015):
	"""Create wireframe box as thin cylinders.

	Args:
	corners: (8, 3) corner positions in glTF coords.
	color_rgba: [R, G, B, A] uint8 color.
	radius: cylinder radius in meters.

	Returns:
	trimesh.Trimesh or None.
	"""
	import trimesh

	edge_pairs = [
	(0, 1), (1, 2), (2, 3), (3, 0),
	(4, 5), (5, 6), (6, 7), (7, 4),
	(0, 4), (1, 5), (2, 6), (3, 7),
	]
	parts = []
	for a, b in edge_pairs:
	cyl = _create_edge_cylinder(
	corners[a].astype(np.float64),
	corners[b].astype(np.float64),
	radius=radius,
	sections=6,
	)
	if cyl is not None:
	cyl.visual.face_colors = color_rgba
	parts.append(cyl)

	if parts:
	return trimesh.util.concatenate(parts)
	return None


	def create_mesh_scene_glb(
	depth_map: np.ndarray,
	image: np.ndarray,
	intrinsics: np.ndarray,
	boxes3d_list: list[np.ndarray],
	output_path: str,
	max_depth: float = 20.0,
	padding: tuple[int, int, int, int] \| None = None,
	remove_edge: bool = True,
	edge_rtol: float = 0.04,
	) -> str:
	"""Create GLB with textured mesh (MoGe2 style) + wireframe 3D boxes.

	Args:
	depth_map: (H, W) or (1, H, W) depth in meters.
	image: (H, W, 3) RGB uint8 [0-255].
	intrinsics: (3, 3) camera intrinsics.
	boxes3d_list: List of (M, 10) box arrays.
	output_path: Where to save .glb.
	max_depth: Max depth cutoff.
	padding: (left, right, top, bottom) to exclude.
	remove_edge: Remove depth discontinuity edges.
	edge_rtol: Edge detection tolerance.

	Returns:
	output_path.
	"""
	import utils3d
	import trimesh
	from PIL import Image as PILImage

	# Prepare depth
	while depth_map.ndim > 2:
	depth_map = depth_map.squeeze(0)
	depth_map = depth_map.astype(np.float32)
	H, W = depth_map.shape

	# Prepare image
	while image.ndim > 3:
	image = image.squeeze(0)
	if image.ndim == 3 and image.shape[0] in (1, 3):
	image = np.transpose(image, (1, 2, 0))
	if image.ndim == 2:
	image = np.stack([image] * 3, axis=-1)
	if image.shape[0] != H or image.shape[1] != W:
	img_pil = PILImage.fromarray(image)
	img_pil = img_pil.resize((W, H), PILImage.BILINEAR)
	image = np.array(img_pil)

	# Build valid mask
	valid = (
	(depth_map > 0.01)
	& (depth_map < max_depth)
	& np.isfinite(depth_map)
	)

	if padding is not None:
	pad_left, pad_right, pad_top, pad_bottom = padding
	if pad_left > 0:
	valid[:, :pad_left] = False
	if pad_right > 0:
	valid[:, W - pad_right:] = False
	if pad_top > 0:
	valid[:pad_top, :] = False
	if pad_bottom > 0:
	valid[H - pad_bottom:, :] = False

	if remove_edge:
	edge = utils3d.np.depth_map_edge(depth_map, rtol=edge_rtol)
	valid &= ~edge

	# Unproject to 3D in OpenCV coords (x-right, y-down, z-forward)
	# Build mesh in OpenCV space first so triangle winding is correct,
	# then transform vertices to glTF coords afterwards.
	fx, fy = float(intrinsics[0, 0]), float(intrinsics[1, 1])
	cx, cy = float(intrinsics[0, 2]), float(intrinsics[1, 2])
	u, v = np.meshgrid(np.arange(W), np.arange(H))
	x3d = (u - cx) * depth_map / fx
	y3d = (v - cy) * depth_map / fy
	z3d = depth_map
	points_cv = np.stack([x3d, y3d, z3d], axis=-1).astype(np.float32)

	# UV map
	uv = np.stack(
	[u / max(W - 1, 1), v / max(H - 1, 1)], axis=-1
	).astype(np.float32)

	# Colors normalized [0, 1]
	colors = image.astype(np.float32) / 255.0

	# Build triangulated mesh in OpenCV coords (preserves correct winding)
	faces, vertices, vertex_colors, vertex_uvs = (
	utils3d.np.build_mesh_from_map(
	points_cv, colors, uv, mask=valid, tri=True
	)
	)

	print(
	f"[Mesh] {vertices.shape[0]} vertices, "
	f"{faces.shape[0]} faces, "
	f"valid pixels: {valid.sum()}/{valid.size}"
	)

	if len(vertices) == 0:
	# Fallback to empty file
	scene = trimesh.Scene()
	scene.export(output_path)
	return output_path

	# Transform vertices: OpenCV (x, y, z) -> glTF (x, -y, -z)
	# This is a 180-degree rotation around x-axis (det=+1),
	# so it preserves triangle winding order.
	vertices = vertices * np.array([1.0, -1.0, -1.0], dtype=np.float32)

	# Trimesh flips UV v when exporting to GLB (OpenGL v=0 at bottom
	# vs glTF v=0 at top). Our UVs are already in image convention
	# (v=0 at top), so pre-flip to compensate for trimesh's flip.
	vertex_uvs = vertex_uvs.copy()
	vertex_uvs[:, 1] = 1.0 - vertex_uvs[:, 1]

	# Create textured mesh (process=False to avoid trimesh modifying geometry)
	texture_img = PILImage.fromarray(image)
	material = trimesh.visual.material.PBRMaterial(
	baseColorTexture=texture_img,
	metallicFactor=0.0,
	roughnessFactor=1.0,
	)
	visuals = trimesh.visual.TextureVisuals(
	uv=vertex_uvs, material=material
	)
	mesh = trimesh.Trimesh(
	vertices=vertices, faces=faces, visual=visuals,
	process=False,
	)

	scene = trimesh.Scene()
	scene.add_geometry(mesh, node_name="scene_mesh")

	# Add wireframe 3D boxes as thin cylinder geometry
	all_corners = []
	for boxes3d in boxes3d_list:
	if len(boxes3d) > 0:
	corners = boxes3d_to_corners(boxes3d)
	all_corners.extend(corners)

	box_colors = _generate_box_colors(len(all_corners))
	for i, corners in enumerate(all_corners):
	box_mesh = _create_wireframe_box_trimesh(
	corners, box_colors[i], radius=0.015
	)
	if box_mesh is not None:
	scene.add_geometry(
	box_mesh, node_name=f"box_{i}"
	)

	scene.export(output_path)
	return output_path