hoho / visu.py

Preparation of the files for the public release.

33113fd 7 months ago

21.7 kB

	# This file provides utility functions for 3D visualization using Open3D.
	# It includes functions for plotting SfM/COLMAP reconstructions (point clouds and camera frustums),
	# wireframe models (vertices and edges), and camera poses from custom data entries.
	# Helper functions for color conversion from Plotly to Open3D formats and
	# for drawing annotations on images are also included.
	from datasets import load_dataset
	from hoho2025.vis import plot_all_modalities
	from hoho2025.viz3d import *
	import pycolmap
	import tempfile,zipfile
	import io
	import open3d as o3d
	from PIL import Image, ImageDraw

	def _plotly_rgb_to_normalized_o3d_color(color_val) -> list[float]:
	"""
	Converts Plotly-style color (str 'rgb(r,g,b)' or tuple (r,g,b))
	to normalized [r/255, g/255, b/255] for Open3D.
	"""
	if isinstance(color_val, str):
	if color_val.startswith('rgba'): # e.g. 'rgba(255,0,0,0.5)'
	parts = color_val[5:-1].split(',')
	return [int(p.strip())/255.0 for p in parts[:3]] # Ignore alpha
	elif color_val.startswith('rgb'): # e.g. 'rgb(255,0,0)'
	parts = color_val[4:-1].split(',')
	return [int(p.strip())/255.0 for p in parts]
	else:
	# Basic color names are not directly supported by this helper for Open3D.
	# Plotly might resolve them, but Open3D needs explicit RGB.
	# Consider adding a name-to-RGB mapping if needed.
	raise ValueError(f"Unsupported color string format for Open3D conversion: {color_val}. Expected 'rgb(...)' or 'rgba(...)'.")
	elif isinstance(color_val, (list, tuple)) and len(color_val) == 3:
	# Assuming input tuple/list is in 0-255 range (e.g., from edge_color_mapping)
	return [c/255.0 for c in color_val]
	raise ValueError(f"Unsupported color type for Open3D conversion: {type(color_val)}. Expected string or 3-element tuple/list.")

	def draw_crosses_on_image(image_pil, vertices_data, output_file_path, size=5, color=(0, 0, 0)):
	"""
	Draws crosses on a PIL Image at specified vertex locations and saves it.
	Args:
	image_pil (PIL.Image.Image): The image to draw on.
	vertices_data (list): List of dictionaries, each with an 'xy' key
	holding [x, y] coordinates.
	output_file_path (str): Path to save the modified image.
	size (int): Size of the cross arms.
	color (tuple): RGB color for the cross.
	"""
	# Work on a copy to avoid modifying the original image
	img_to_draw_on = image_pil.copy()
	drawer = ImageDraw.Draw(img_to_draw_on)

	for vert_info in vertices_data:
	if 'xy' in vert_info:
	x, y = vert_info['xy']
	# Ensure coordinates are integers for drawing
	x_int, y_int = int(round(x)), int(round(y))

	# Draw horizontal line
	drawer.line([(x_int - size, y_int), (x_int + size, y_int)], fill=color, width=1)
	# Draw vertical line
	drawer.line([(x_int, y_int - size), (x_int, y_int + size)], fill=color, width=1)

	img_to_draw_on.save(output_file_path)

	def save_gestalt_with_proj(gest_seg_np, gt_verts, img_id):
	# Convert gest_seg_np (which is a numpy array) to a PIL Image
	# Assuming gest_seg_np is a 2D grayscale or a 3-channel RGB image
	if gest_seg_np.ndim == 2:
	img_to_draw_on = Image.fromarray(gest_seg_np, mode='L')
	elif gest_seg_np.ndim == 3 and gest_seg_np.shape[2] == 3:
	img_to_draw_on = Image.fromarray(gest_seg_np, mode='RGB')
	else:
	# Fallback or error handling if the format is unexpected
	# For simplicity, let's assume it can be converted directly or handle specific cases
	img_to_draw_on = Image.fromarray(gest_seg_np.astype(np.uint8))

	# Ensure the image is in a mode that allows color drawing (e.g., RGB)
	if img_to_draw_on.mode == 'L':
	img_to_draw_on = img_to_draw_on.convert('RGB')

	draw = ImageDraw.Draw(img_to_draw_on)
	cross_size = 5 # Size of the cross arms
	cross_color = (0, 0, 0) # Red color for the cross

	for vert_dict in gt_verts:
	x, y = vert_dict['xy']
	# Draw horizontal line of the cross
	draw.line([(x - cross_size, y), (x + cross_size, y)], fill=cross_color, width=1)
	# Draw vertical line of the cross
	draw.line([(x, y - cross_size), (x, y + cross_size)], fill=cross_color, width=1)

	# Save the image with drawn crosses
	# You might want to use a different filename or path
	img_to_draw_on.save(f'gestalt_cross/{img_id}.png')

	def plot_reconstruction_local(
	fig: go.Figure,
	rec: pycolmap.Reconstruction, # Added type hint
	color: str = 'rgb(0, 0, 255)',
	name: Optional[str] = None,
	points: bool = True,
	cameras: bool = True,
	cs: float = 1.0,
	single_color_points=False,
	camera_color='rgba(0, 255, 0, 0.5)',
	crop_outliers: bool = False):
	# rec is a pycolmap.Reconstruction object
	# Filter outliers
	xyzs = []
	rgbs = []
	# Iterate over rec.points3D
	for k, p3D in rec.points3D.items():
	#print (p3D)
	xyzs.append(p3D.xyz)
	rgbs.append(p3D.color)

	xyzs = np.array(xyzs)
	rgbs = np.array(rgbs)

	# Crop outliers if requested
	if crop_outliers and len(xyzs) > 0:
	# Calculate distances from origin
	distances = np.linalg.norm(xyzs, axis=1)
	# Find threshold at 98th percentile (removing 2% furthest points)
	threshold = np.percentile(distances, 98)
	# Filter points
	mask = distances <= threshold
	xyzs = xyzs[mask]
	rgbs = rgbs[mask]
	print(f"Cropped outliers: removed {np.sum(~mask)} out of {len(mask)} points ({np.sum(~mask)/len(mask)*100:.2f}%)")

	if points and len(xyzs) > 0:
	pcd = o3d.geometry.PointCloud()
	pcd.points = o3d.utility.Vector3dVector(xyzs)

	# Normalize RGB colors from [0, 255] to [0, 1] for Open3D
	# and ensure rgbs is not empty before normalization
	if rgbs.size > 0:
	normalized_rgbs = rgbs / 255.0
	pcd.colors = o3d.utility.Vector3dVector(normalized_rgbs)

	# Original Plotly plot_points call is replaced by Open3D visualization:
	# plot_points(fig, xyzs, color=color if single_color_points else rgbs, ps=1, name=name)

	# This code assumes it's placed within the plot_reconstruction_local function,
	# after point cloud processing, and that a list `geometries` (List[o3d.geometry.Geometry])
	# is defined in the function's scope to collect all geometries.
	# It uses arguments `cameras`, `rec`, `cs`, `camera_color` from the function signature.
	# The helper `_plotly_rgb_to_normalized_o3d_color` is assumed to be available.

	if cameras: # Check if camera visualization is enabled
	try:
	# Convert Plotly-style camera_color string to normalized RGB for Open3D
	cam_color_normalized = _plotly_rgb_to_normalized_o3d_color(camera_color)
	except ValueError as e:
	print(f"Warning: Invalid camera_color '{camera_color}'. Using default green. Error: {e}")
	cam_color_normalized = [0.0, 1.0, 0.0] # Default to green

	geometries = []

	for image_id, image_data in rec.images.items():
	# Get camera object and its intrinsic matrix K
	cam = rec.cameras[image_data.camera_id]
	K = cam.calibration_matrix()

	# Validate intrinsics (e.g., focal length check from original code)
	if K[0, 0] > 5000 or K[1, 1] > 5000:
	print(f"Skipping camera for image {image_id} due to large focal length (fx={K[0,0]}, fy={K[1,1]}).")
	continue

	# Get camera pose (T_world_cam = T_cam_world.inverse())
	# image_data.cam_from_world is T_cam_world (camera coordinates from world coordinates)
	T_world_cam = image_data.cam_from_world.inverse()
	R_wc = T_world_cam.rotation.matrix() # Rotation: camera frame to world frame
	t_wc = T_world_cam.translation # Origin of camera frame in world coordinates (pyramid apex)

	W, H = float(cam.width), float(cam.height)

	# Skip if camera dimensions are invalid
	if W <= 0 or H <= 0:
	print(f"Skipping camera for image {image_id} due to invalid dimensions (W={W}, H={H}).")
	continue

	# Define image plane corners in pixel coordinates (top-left, top-right, bottom-right, bottom-left)
	img_corners_px = np.array([
	[0, 0], [W, 0], [W, H], [0, H]
	], dtype=float)

	# Convert pixel corners to homogeneous coordinates
	img_corners_h = np.hstack([img_corners_px, np.ones((4, 1))])

	try:
	K_inv = np.linalg.inv(K)
	except np.linalg.LinAlgError:
	print(f"Skipping camera for image {image_id} due to non-invertible K matrix.")
	continue

	# Unproject pixel corners to normalized camera coordinates (points on z=1 plane in camera frame)
	cam_coords_norm = (K_inv @ img_corners_h.T).T

	# Scale these points by 'cs' (camera scale factor, determines frustum size)
	# These points are ( (x/z)cs, (y/z)cs, cs ) in the camera's coordinate system.
	cam_coords_scaled = cam_coords_norm * cs

	# Transform scaled base corners from camera coordinates to world coordinates
	world_coords_base = (R_wc @ cam_coords_scaled.T).T + t_wc

	# Define vertices for the camera pyramid visualization
	# Vertex 0 is the apex (camera center), vertices 1-4 are the base corners
	pyramid_vertices = np.vstack((t_wc, world_coords_base))
	if not pyramid_vertices.flags['C_CONTIGUOUS']:
	pyramid_vertices = np.ascontiguousarray(pyramid_vertices, dtype=np.float64)
	elif pyramid_vertices.dtype != np.float64:
	pyramid_vertices = np.asarray(pyramid_vertices, dtype=np.float64)

	# Define lines for the pyramid: 4 from apex to base, 4 for the base rectangle
	lines = np.array([
	[0, 1], [0, 2], [0, 3], [0, 4], # Apex to base corners
	[1, 2], [2, 3], [3, 4], [4, 1] # Base rectangle
	])

	if not lines.flags['C_CONTIGUOUS']:
	lines = np.ascontiguousarray(lines, dtype=np.int32)
	elif lines.dtype != np.int32:
	lines = np.asarray(lines, dtype=np.int32)

	# Create Open3D LineSet object for the camera pyramid
	camera_pyramid_lineset = o3d.geometry.LineSet()
	camera_pyramid_lineset.points = o3d.utility.Vector3dVector(pyramid_vertices)
	camera_pyramid_lineset.lines = o3d.utility.Vector2iVector(lines)

	# Add the camera visualization to the list of geometries to be rendered
	geometries.append(camera_pyramid_lineset)

	else:
	geometries = []

	return pcd, geometries

	def plot_wireframe_local(
	fig: go.Figure, # This argument is no longer used for plotting by this function.
	vertices: np.ndarray,
	edges: np.ndarray,
	classifications: np.ndarray = None,
	color: str = 'rgb(0, 0, 255)', # Default color for vertices and unclassified/default edges.
	name: Optional[str] = None, # No direct equivalent for Open3D geometry list's name/legend.
	**kwargs) -> list: # Returns a list of o3d.geometry.Geometry objects.
	"""
	Generates Open3D geometries for a wireframe: a PointCloud for vertices
	and a LineSet for edges.

	Args:
	fig: Plotly figure object (no longer used for plotting by this function).
	vertices: Numpy array of vertex coordinates (N, 3).
	edges: Numpy array of edge connections (M, 2), indices into vertices.
	classifications: Optional numpy array of classifications for edges.
	color: Default color string (e.g., 'rgb(0,0,255)') for vertices and
	for edges if classifications are not provided or don't match.
	name: Optional name (unused in Open3D context here).
	**kwargs: Additional keyword arguments (unused).

	Returns:
	A list of Open3D geometry objects. Empty if no vertices.
	Note: Plotly-specific 'ps' (point size) and line width are not
	directly translated. These are typically visualizer render options in Open3D.
	"""
	open3d_geometries = []

	# Ensure gt_vertices is numpy array, C-contiguous, and float64
	# np.asarray avoids a copy if 'vertices' is already a suitable ndarray.
	gt_vertices = np.asarray(vertices)
	if not gt_vertices.flags['C_CONTIGUOUS'] or gt_vertices.dtype != np.float64:
	gt_vertices = np.ascontiguousarray(gt_vertices, dtype=np.float64)

	# Ensure gt_connections is numpy array, C-contiguous, and int32
	gt_connections = np.asarray(edges)
	if not gt_connections.flags['C_CONTIGUOUS'] or gt_connections.dtype != np.int32:
	gt_connections = np.ascontiguousarray(gt_connections, dtype=np.int32)

	if gt_vertices.size == 0:
	return open3d_geometries

	# 1. Create PointCloud for Vertices
	try:
	vertex_rgb_normalized = _plotly_rgb_to_normalized_o3d_color(color)
	except ValueError as e:
	print(f"Warning: Could not parse vertex color '{color}'. Using default black. Error: {e}")
	vertex_rgb_normalized = [0.0, 0.0, 0.0] # Default to black on error

	vertex_pcd = o3d.geometry.PointCloud()
	# gt_vertices is now C-contiguous and float64
	vertex_pcd.points = o3d.utility.Vector3dVector(gt_vertices)

	num_vertices = len(gt_vertices)
	if num_vertices > 0:
	# Create vertex_colors_np with correct dtype
	vertex_colors_np = np.array([vertex_rgb_normalized] * num_vertices, dtype=np.float64)
	# Ensure C-contiguity (dtype is already float64 from np.array construction)
	# This check is a safeguard, as np.array from a list of lists with specified dtype is usually contiguous.
	if not vertex_colors_np.flags['C_CONTIGUOUS']:
	vertex_colors_np = np.ascontiguousarray(vertex_colors_np) # Preserves dtype
	vertex_pcd.colors = o3d.utility.Vector3dVector(vertex_colors_np)
	open3d_geometries.append(vertex_pcd)

	# 2. Create LineSet for Edges
	if gt_connections.size > 0 and num_vertices > 0: # Edges need vertices
	line_set = o3d.geometry.LineSet()
	# gt_vertices is already C-contiguous and float64
	line_set.points = o3d.utility.Vector3dVector(gt_vertices)
	# gt_connections is already C-contiguous and int32
	line_set.lines = o3d.utility.Vector2iVector(gt_connections)

	line_colors_list_normalized = []
	if classifications is not None and len(classifications) == len(gt_connections):
	# Assuming EDGE_CLASSES_BY_ID and edge_color_mapping are defined in the global scope
	# or imported, as implied by the original code structure.
	for c_idx in classifications:
	try:
	color_tuple_255 = edge_color_mapping[EDGE_CLASSES_BY_ID[c_idx]]
	line_colors_list_normalized.append(_plotly_rgb_to_normalized_o3d_color(color_tuple_255))
	except KeyError: # Handle cases where classification ID or mapping is not found
	print(f"Warning: Classification ID {c_idx} or its mapping not found. Using default color.")
	line_colors_list_normalized.append(vertex_rgb_normalized) # Fallback to default vertex color
	except Exception as e:
	print(f"Warning: Error processing classification color for index {c_idx}. Using default. Error: {e}")
	line_colors_list_normalized.append(vertex_rgb_normalized) # Fallback
	else:
	# Use the default 'color' for all lines if no classifications or mismatch
	default_line_rgb_normalized = vertex_rgb_normalized # Same as vertex color by default
	for _ in range(len(gt_connections)):
	line_colors_list_normalized.append(default_line_rgb_normalized)

	if line_colors_list_normalized: # Check if list is not empty
	# Create line_colors_np with correct dtype
	line_colors_np = np.array(line_colors_list_normalized, dtype=np.float64)
	# Ensure C-contiguity (dtype is already float64)
	# Safeguard, similar to vertex_colors_np.
	if not line_colors_np.flags['C_CONTIGUOUS']:
	line_colors_np = np.ascontiguousarray(line_colors_np) # Preserves dtype
	line_set.colors = o3d.utility.Vector3dVector(line_colors_np)

	open3d_geometries.append(line_set)

	return open3d_geometries

	def plot_bpo_cameras_from_entry_local(fig: go.Figure, entry: dict, idx = None, camera_scale_factor: float = 1.0):
	def cam2world_to_world2cam(R, t):
	rt = np.eye(4)
	rt[:3,:3] = R
	rt[:3,3] = t.reshape(-1)
	rt = np.linalg.inv(rt)
	return rt[:3,:3], rt[:3,3]
	geometries = []
	for i in range(len(entry['R'])):
	if idx is not None and i != idx:
	continue

	# Parameters for this camera visualization
	# current_cam_size = 1.0 # Original 'size = 1.' - Replaced by camera_scale_factor
	current_cam_color_str = 'rgb(0, 0, 255)' # Original 'color = 'rgb(0, 0, 255)''

	# Load camera parameters from entry
	K_matrix = np.array(entry['K'][i])
	R_orig = np.array(entry['R'][i])
	t_orig = np.array(entry['t'][i])

	# Apply cam2world_to_world2cam transformation as in original snippet
	# This R_transformed, t_transformed will be used to place the camera geometry
	R_transformed, t_transformed = cam2world_to_world2cam(R_orig, t_orig)

	# Image dimensions from K matrix (cx, cy are K[0,2], K[1,2])
	# Ensure W_img and H_img are derived correctly. Assuming K[0,2] and K[1,2] are principal points cx, cy.
	# If K is [fx, 0, cx; 0, fy, cy; 0, 0, 1], then W_img and H_img might need to come from elsewhere
	# or be estimated if not directly available. The original code used K[0,2]2, K[1,2]2.
	# This implies cx = W/2, cy = H/2.
	W_img, H_img = K_matrix[0, 2] * 2, K_matrix[1, 2] * 2
	if W_img <= 0 or H_img <= 0:
	# Attempt to get W, H from cam.width, cam.height if available in entry, like in colmap
	# This part depends on the structure of 'entry'. For now, stick to original logic.
	print(f"Warning: Camera {i} has invalid dimensions (W={W_img}, H={H_img}) based on K. Skipping.")
	continue

	# Define image plane corners in pixel coordinates (top-left, top-right, bottom-right, bottom-left)
	corners_px = np.array([[0, 0], [W_img, 0], [W_img, H_img], [0, H_img]], dtype=float)

	# Removed scale_val, image_extent, world_extent calculations.
	# The scaling is now directly controlled by camera_scale_factor.

	try:
	K_inv = np.linalg.inv(K_matrix)
	except np.linalg.LinAlgError:
	print(f"Warning: K matrix for camera {i} is singular. Skipping this camera.")
	continue

	# Unproject pixel corners to homogeneous camera coordinates.
	# Assuming to_homogeneous converts (N,2) pixel coords to (N,3) homogeneous coords [px, py, 1].
	# These points are on the z=1 plane in camera coordinates.
	corners_cam_homog = to_homogeneous(corners_px) @ K_inv.T

	# Scale these points by camera_scale_factor.
	# This makes the frustum base at z=camera_scale_factor in camera coordinates.
	scaled_cam_points = corners_cam_homog * camera_scale_factor

	# Transform scaled camera points to world coordinates using R_transformed, t_transformed
	world_coords_base = scaled_cam_points @ R_transformed.T + t_transformed

	# Apex of the pyramid is t_transformed
	apex_world = t_transformed.reshape(1, 3)

	# Vertices for Open3D LineSet: apex (vertex 0) + 4 base corners (vertices 1-4)
	pyramid_vertices_np = np.vstack((apex_world, world_coords_base))
	if not pyramid_vertices_np.flags['C_CONTIGUOUS'] or pyramid_vertices_np.dtype != np.float64:
	pyramid_vertices_np = np.ascontiguousarray(pyramid_vertices_np, dtype=np.float64)

	# Lines for the pyramid: 4 from apex to base, 4 for the base rectangle
	lines_np = np.array([
	[0, 1], [0, 2], [0, 3], [0, 4], # Apex to base corners
	[1, 2], [2, 3], [3, 4], [4, 1] # Base rectangle (closed loop)
	], dtype=np.int32)

	# Create Open3D LineSet object for the camera pyramid
	camera_lineset = o3d.geometry.LineSet()
	camera_lineset.points = o3d.utility.Vector3dVector(pyramid_vertices_np)
	lines_np = np.ascontiguousarray(lines_np, dtype=np.int32)
	camera_lineset.lines = o3d.utility.Vector2iVector(lines_np)

	# Color the LineSet
	try:
	o3d_color = _plotly_rgb_to_normalized_o3d_color(current_cam_color_str)
	except ValueError as e:
	print(f"Warning: Invalid camera color string '{current_cam_color_str}' for camera {i}. Using default blue. Error: {e}")
	o3d_color = [0.0, 0.0, 1.0] # Default to blue
	camera_lineset.colors = o3d.utility.Vector3dVector(np.array([o3d_color] * len(lines_np), dtype=np.float64))

	geometries.append(camera_lineset)

	return geometries