jskvrna
/

hoho

English

hoho25k

s23dr2025

Model card Files Files and versions

xet

Community

jskvrna commited on May 20, 2025

Commit

d2ea57b

1 Parent(s): f4eb848

Initial commit

Browse files

Files changed (1) hide show

test.py +401 -0

test.py ADDED Viewed

	@@ -0,0 +1,401 @@

+from datasets import load_dataset
+from hoho2025.vis import plot_all_modalities
+from hoho2025.viz3d import *
+import pycolmap
+import tempfile,zipfile
+import io
+import open3d as o3d
+from hoho2025.example_solutions import predict_wireframe
+def read_colmap_rec(colmap_data):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with zipfile.ZipFile(io.BytesIO(colmap_data), "r") as zf:
+            zf.extractall(tmpdir)  # unpacks cameras.txt, images.txt, etc. to tmpdir
+        # Now parse with pycolmap
+        rec = pycolmap.Reconstruction(tmpdir)
+        return rec
+def _plotly_rgb_to_normalized_o3d_color(color_val) -> list[float]:
+    """
+    Converts Plotly-style color (str 'rgb(r,g,b)' or tuple (r,g,b))
+    to normalized [r/255, g/255, b/255] for Open3D.
+    """
+    if isinstance(color_val, str):
+        if color_val.startswith('rgba'): # e.g. 'rgba(255,0,0,0.5)'
+            parts = color_val[5:-1].split(',')
+            return [int(p.strip())/255.0 for p in parts[:3]] # Ignore alpha
+        elif color_val.startswith('rgb'): # e.g. 'rgb(255,0,0)'
+            parts = color_val[4:-1].split(',')
+            return [int(p.strip())/255.0 for p in parts]
+        else:
+            # Basic color names are not directly supported by this helper for Open3D.
+            # Plotly might resolve them, but Open3D needs explicit RGB.
+            # Consider adding a name-to-RGB mapping if needed.
+            raise ValueError(f"Unsupported color string format for Open3D conversion: {color_val}. Expected 'rgb(...)' or 'rgba(...)'.")
+    elif isinstance(color_val, (list, tuple)) and len(color_val) == 3:
+        # Assuming input tuple/list is in 0-255 range (e.g., from edge_color_mapping)
+        return [c/255.0 for c in color_val]
+    raise ValueError(f"Unsupported color type for Open3D conversion: {type(color_val)}. Expected string or 3-element tuple/list.")
+def plot_reconstruction_local(
+        fig: go.Figure,
+        rec: pycolmap.Reconstruction, # Added type hint
+        color: str = 'rgb(0, 0, 255)',
+        name: Optional[str] = None,
+        points: bool = True,
+        cameras: bool = True,
+        cs: float = 1.0,
+        single_color_points=False,
+        camera_color='rgba(0, 255, 0, 0.5)',
+        crop_outliers: bool = False):
+    # rec is a pycolmap.Reconstruction object
+    # Filter outliers
+    xyzs = []
+    rgbs = []
+    # Iterate over rec.points3D
+    for k, p3D in rec.points3D.items():
+        #print (p3D)
+        xyzs.append(p3D.xyz)
+        rgbs.append(p3D.color)
+    xyzs = np.array(xyzs)
+    rgbs = np.array(rgbs)
+    # Crop outliers if requested
+    if crop_outliers and len(xyzs) > 0:
+        # Calculate distances from origin
+        distances = np.linalg.norm(xyzs, axis=1)
+        # Find threshold at 98th percentile (removing 2% furthest points)
+        threshold = np.percentile(distances, 98)
+        # Filter points
+        mask = distances <= threshold
+        xyzs = xyzs[mask]
+        rgbs = rgbs[mask]
+        print(f"Cropped outliers: removed {np.sum(~mask)} out of {len(mask)} points ({np.sum(~mask)/len(mask)*100:.2f}%)")
+    if points and len(xyzs) > 0:
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(xyzs)
+        # Normalize RGB colors from [0, 255] to [0, 1] for Open3D
+        # and ensure rgbs is not empty before normalization
+        if rgbs.size > 0:
+            normalized_rgbs = rgbs / 255.0
+            pcd.colors = o3d.utility.Vector3dVector(normalized_rgbs)
+        # Original Plotly plot_points call is replaced by Open3D visualization:
+        # plot_points(fig, xyzs, color=color if single_color_points else rgbs, ps=1, name=name)
+    # This code assumes it's placed within the plot_reconstruction_local function,
+    # after point cloud processing, and that a list `geometries` (List[o3d.geometry.Geometry])
+    # is defined in the function's scope to collect all geometries.
+    # It uses arguments `cameras`, `rec`, `cs`, `camera_color` from the function signature.
+    # The helper `_plotly_rgb_to_normalized_o3d_color` is assumed to be available.
+    if cameras: # Check if camera visualization is enabled
+        try:
+            # Convert Plotly-style camera_color string to normalized RGB for Open3D
+            cam_color_normalized = _plotly_rgb_to_normalized_o3d_color(camera_color)
+        except ValueError as e:
+            print(f"Warning: Invalid camera_color '{camera_color}'. Using default green. Error: {e}")
+            cam_color_normalized = [0.0, 1.0, 0.0]  # Default to green
+        geometries = []
+        for image_id, image_data in rec.images.items():
+            # Get camera object and its intrinsic matrix K
+            cam = rec.cameras[image_data.camera_id]
+            K = cam.calibration_matrix()
+            # Validate intrinsics (e.g., focal length check from original code)
+            if K[0, 0] > 5000 or K[1, 1] > 5000:
+                print(f"Skipping camera for image {image_id} due to large focal length (fx={K[0,0]}, fy={K[1,1]}).")
+                continue
+            # Get camera pose (T_world_cam = T_cam_world.inverse())
+            # image_data.cam_from_world is T_cam_world (camera coordinates from world coordinates)
+            T_world_cam = image_data.cam_from_world.inverse()
+            R_wc = T_world_cam.rotation.matrix()  # Rotation: camera frame to world frame
+            t_wc = T_world_cam.translation      # Origin of camera frame in world coordinates (pyramid apex)
+            W, H = float(cam.width), float(cam.height)
+            # Skip if camera dimensions are invalid
+            if W <= 0 or H <= 0:
+                print(f"Skipping camera for image {image_id} due to invalid dimensions (W={W}, H={H}).")
+                continue
+            # Define image plane corners in pixel coordinates (top-left, top-right, bottom-right, bottom-left)
+            img_corners_px = np.array([
+                [0, 0], [W, 0], [W, H], [0, H]
+            ], dtype=float)
+            # Convert pixel corners to homogeneous coordinates
+            img_corners_h = np.hstack([img_corners_px, np.ones((4, 1))])
+            try:
+                K_inv = np.linalg.inv(K)
+            except np.linalg.LinAlgError:
+                print(f"Skipping camera for image {image_id} due to non-invertible K matrix.")
+                continue
+            # Unproject pixel corners to normalized camera coordinates (points on z=1 plane in camera frame)
+            cam_coords_norm = (K_inv @ img_corners_h.T).T
+            # Scale these points by 'cs' (camera scale factor, determines frustum size)
+            # These points are ( (x/z)*cs, (y/z)*cs, cs ) in the camera's coordinate system.
+            cam_coords_scaled = cam_coords_norm * cs
+            # Transform scaled base corners from camera coordinates to world coordinates
+            world_coords_base = (R_wc @ cam_coords_scaled.T).T + t_wc
+            # Define vertices for the camera pyramid visualization
+            # Vertex 0 is the apex (camera center), vertices 1-4 are the base corners
+            pyramid_vertices = np.vstack((t_wc, world_coords_base))
+            if not pyramid_vertices.flags['C_CONTIGUOUS']:
+                pyramid_vertices = np.ascontiguousarray(pyramid_vertices, dtype=np.float64)
+            elif pyramid_vertices.dtype != np.float64:
+                pyramid_vertices = np.asarray(pyramid_vertices, dtype=np.float64)
+            # Define lines for the pyramid: 4 from apex to base, 4 for the base rectangle
+            lines = np.array([
+                [0, 1], [0, 2], [0, 3], [0, 4],  # Apex to base corners
+                [1, 2], [2, 3], [3, 4], [4, 1]   # Base rectangle
+            ])
+            if not lines.flags['C_CONTIGUOUS']:
+                lines = np.ascontiguousarray(lines, dtype=np.int32)
+            elif lines.dtype != np.int32:
+                lines = np.asarray(lines, dtype=np.int32)
+            # Create Open3D LineSet object for the camera pyramid
+            camera_pyramid_lineset = o3d.geometry.LineSet()
+            camera_pyramid_lineset.points = o3d.utility.Vector3dVector(pyramid_vertices)
+            camera_pyramid_lineset.lines = o3d.utility.Vector2iVector(lines)
+            # Add the camera visualization to the list of geometries to be rendered
+            geometries.append(camera_pyramid_lineset)
+    else:
+        geometries = []
+    return pcd, geometries
+def plot_wireframe_local(
+        fig: go.Figure, # This argument is no longer used for plotting by this function.
+        vertices: np.ndarray,
+        edges: np.ndarray,
+        classifications: np.ndarray = None,
+        color: str = 'rgb(0, 0, 255)', # Default color for vertices and unclassified/default edges.
+        name: Optional[str] = None, # No direct equivalent for Open3D geometry list's name/legend.
+        **kwargs) -> list: # Returns a list of o3d.geometry.Geometry objects.
+    """
+    Generates Open3D geometries for a wireframe: a PointCloud for vertices
+    and a LineSet for edges.
+    Args:
+        fig: Plotly figure object (no longer used for plotting by this function).
+        vertices: Numpy array of vertex coordinates (N, 3).
+        edges: Numpy array of edge connections (M, 2), indices into vertices.
+        classifications: Optional numpy array of classifications for edges.
+        color: Default color string (e.g., 'rgb(0,0,255)') for vertices and
+               for edges if classifications are not provided or don't match.
+        name: Optional name (unused in Open3D context here).
+        **kwargs: Additional keyword arguments (unused).
+    Returns:
+        A list of Open3D geometry objects. Empty if no vertices.
+        Note: Plotly-specific 'ps' (point size) and line width are not
+        directly translated. These are typically visualizer render options in Open3D.
+    """
+    open3d_geometries = []
+    # Ensure gt_vertices is numpy array, C-contiguous, and float64
+    # np.asarray avoids a copy if 'vertices' is already a suitable ndarray.
+    gt_vertices = np.asarray(vertices)
+    if not gt_vertices.flags['C_CONTIGUOUS'] or gt_vertices.dtype != np.float64:
+        gt_vertices = np.ascontiguousarray(gt_vertices, dtype=np.float64)
+    # Ensure gt_connections is numpy array, C-contiguous, and int32
+    gt_connections = np.asarray(edges)
+    if not gt_connections.flags['C_CONTIGUOUS'] or gt_connections.dtype != np.int32:
+        gt_connections = np.ascontiguousarray(gt_connections, dtype=np.int32)
+    if gt_vertices.size == 0:
+        return open3d_geometries
+    # 1. Create PointCloud for Vertices
+    try:
+        vertex_rgb_normalized = _plotly_rgb_to_normalized_o3d_color(color)
+    except ValueError as e:
+        print(f"Warning: Could not parse vertex color '{color}'. Using default black. Error: {e}")
+        vertex_rgb_normalized = [0.0, 0.0, 0.0] # Default to black on error
+    vertex_pcd = o3d.geometry.PointCloud()
+    # gt_vertices is now C-contiguous and float64
+    vertex_pcd.points = o3d.utility.Vector3dVector(gt_vertices)
+    num_vertices = len(gt_vertices)
+    if num_vertices > 0:
+        # Create vertex_colors_np with correct dtype
+        vertex_colors_np = np.array([vertex_rgb_normalized] * num_vertices, dtype=np.float64)
+        # Ensure C-contiguity (dtype is already float64 from np.array construction)
+        # This check is a safeguard, as np.array from a list of lists with specified dtype is usually contiguous.
+        if not vertex_colors_np.flags['C_CONTIGUOUS']:
+            vertex_colors_np = np.ascontiguousarray(vertex_colors_np) # Preserves dtype
+        vertex_pcd.colors = o3d.utility.Vector3dVector(vertex_colors_np)
+    open3d_geometries.append(vertex_pcd)
+    # 2. Create LineSet for Edges
+    if gt_connections.size > 0 and num_vertices > 0: # Edges need vertices
+        line_set = o3d.geometry.LineSet()
+        # gt_vertices is already C-contiguous and float64
+        line_set.points = o3d.utility.Vector3dVector(gt_vertices)
+        # gt_connections is already C-contiguous and int32
+        line_set.lines = o3d.utility.Vector2iVector(gt_connections)
+        line_colors_list_normalized = []
+        if classifications is not None and len(classifications) == len(gt_connections):
+            # Assuming EDGE_CLASSES_BY_ID and edge_color_mapping are defined in the global scope
+            # or imported, as implied by the original code structure.
+            for c_idx in classifications:
+                try:
+                    color_tuple_255 = edge_color_mapping[EDGE_CLASSES_BY_ID[c_idx]]
+                    line_colors_list_normalized.append(_plotly_rgb_to_normalized_o3d_color(color_tuple_255))
+                except KeyError: # Handle cases where classification ID or mapping is not found
+                     print(f"Warning: Classification ID {c_idx} or its mapping not found. Using default color.")
+                     line_colors_list_normalized.append(vertex_rgb_normalized) # Fallback to default vertex color
+                except Exception as e:
+                    print(f"Warning: Error processing classification color for index {c_idx}. Using default. Error: {e}")
+                    line_colors_list_normalized.append(vertex_rgb_normalized) # Fallback
+        else:
+            # Use the default 'color' for all lines if no classifications or mismatch
+            default_line_rgb_normalized = vertex_rgb_normalized # Same as vertex color by default
+            for _ in range(len(gt_connections)):
+                line_colors_list_normalized.append(default_line_rgb_normalized)
+        if line_colors_list_normalized: # Check if list is not empty
+            # Create line_colors_np with correct dtype
+            line_colors_np = np.array(line_colors_list_normalized, dtype=np.float64)
+            # Ensure C-contiguity (dtype is already float64)
+            # Safeguard, similar to vertex_colors_np.
+            if not line_colors_np.flags['C_CONTIGUOUS']:
+                line_colors_np = np.ascontiguousarray(line_colors_np) # Preserves dtype
+            line_set.colors = o3d.utility.Vector3dVector(line_colors_np)
+        open3d_geometries.append(line_set)
+    return open3d_geometries
+def plot_bpo_cameras_from_entry_local(fig: go.Figure, entry: dict, idx = None, camera_scale_factor: float = 1.0):
+    def cam2world_to_world2cam(R, t):
+        rt = np.eye(4)
+        rt[:3,:3] = R
+        rt[:3,3] = t.reshape(-1)
+        rt = np.linalg.inv(rt)
+        return rt[:3,:3], rt[:3,3]
+    geometries = []
+    for i in range(len(entry['R'])):
+        if idx is not None and i != idx:
+            continue
+        # Parameters for this camera visualization
+        # current_cam_size = 1.0  # Original 'size = 1.' - Replaced by camera_scale_factor
+        current_cam_color_str = 'rgb(0, 0, 255)' # Original 'color = 'rgb(0, 0, 255)''
+        # Load camera parameters from entry
+        K_matrix = np.array(entry['K'][i])
+        R_orig = np.array(entry['R'][i])
+        t_orig = np.array(entry['t'][i])
+        # Apply cam2world_to_world2cam transformation as in original snippet
+        # This R_transformed, t_transformed will be used to place the camera geometry
+        R_transformed, t_transformed = cam2world_to_world2cam(R_orig, t_orig)
+        # Image dimensions from K matrix (cx, cy are K[0,2], K[1,2])
+        # Ensure W_img and H_img are derived correctly. Assuming K[0,2] and K[1,2] are principal points cx, cy.
+        # If K is [fx, 0, cx; 0, fy, cy; 0, 0, 1], then W_img and H_img might need to come from elsewhere
+        # or be estimated if not directly available. The original code used K[0,2]*2, K[1,2]*2.
+        # This implies cx = W/2, cy = H/2.
+        W_img, H_img = K_matrix[0, 2] * 2, K_matrix[1, 2] * 2
+        if W_img <= 0 or H_img <= 0:
+            # Attempt to get W, H from cam.width, cam.height if available in entry, like in colmap
+            # This part depends on the structure of 'entry'. For now, stick to original logic.
+            print(f"Warning: Camera {i} has invalid dimensions (W={W_img}, H={H_img}) based on K. Skipping.")
+            continue
+        # Define image plane corners in pixel coordinates (top-left, top-right, bottom-right, bottom-left)
+        corners_px = np.array([[0, 0], [W_img, 0], [W_img, H_img], [0, H_img]], dtype=float)
+        # Removed scale_val, image_extent, world_extent calculations.
+        # The scaling is now directly controlled by camera_scale_factor.
+        try:
+            K_inv = np.linalg.inv(K_matrix)
+        except np.linalg.LinAlgError:
+            print(f"Warning: K matrix for camera {i} is singular. Skipping this camera.")
+            continue
+        # Unproject pixel corners to homogeneous camera coordinates.
+        # Assuming to_homogeneous converts (N,2) pixel coords to (N,3) homogeneous coords [px, py, 1].
+        # These points are on the z=1 plane in camera coordinates.
+        corners_cam_homog = to_homogeneous(corners_px) @ K_inv.T
+        # Scale these points by camera_scale_factor.
+        # This makes the frustum base at z=camera_scale_factor in camera coordinates.
+        scaled_cam_points = corners_cam_homog * camera_scale_factor
+        # Transform scaled camera points to world coordinates using R_transformed, t_transformed
+        world_coords_base = scaled_cam_points @ R_transformed.T + t_transformed
+        # Apex of the pyramid is t_transformed
+        apex_world = t_transformed.reshape(1, 3)
+        # Vertices for Open3D LineSet: apex (vertex 0) + 4 base corners (vertices 1-4)
+        pyramid_vertices_np = np.vstack((apex_world, world_coords_base))
+        if not pyramid_vertices_np.flags['C_CONTIGUOUS'] or pyramid_vertices_np.dtype != np.float64:
+            pyramid_vertices_np = np.ascontiguousarray(pyramid_vertices_np, dtype=np.float64)
+        # Lines for the pyramid: 4 from apex to base, 4 for the base rectangle
+        lines_np = np.array([
+            [0, 1], [0, 2], [0, 3], [0, 4],  # Apex to base corners
+            [1, 2], [2, 3], [3, 4], [4, 1]   # Base rectangle (closed loop)
+        ], dtype=np.int32)
+        # Create Open3D LineSet object for the camera pyramid
+        camera_lineset = o3d.geometry.LineSet()
+        camera_lineset.points = o3d.utility.Vector3dVector(pyramid_vertices_np)
+        lines_np = np.ascontiguousarray(lines_np, dtype=np.int32)
+        camera_lineset.lines = o3d.utility.Vector2iVector(lines_np)
+        # Color the LineSet
+        try:
+            o3d_color = _plotly_rgb_to_normalized_o3d_color(current_cam_color_str)
+        except ValueError as e:
+            print(f"Warning: Invalid camera color string '{current_cam_color_str}' for camera {i}. Using default blue. Error: {e}")
+            o3d_color = [0.0, 0.0, 1.0] # Default to blue
+        camera_lineset.colors = o3d.utility.Vector3dVector(np.array([o3d_color] * len(lines_np), dtype=np.float64))
+        geometries.append(camera_lineset)
+    return geometries
+ds = load_dataset("usm3d/hoho25k", streaming=True, trust_remote_code=True)
+for a in ds['train']:
+    colmap = read_colmap_rec(a['colmap_binary'])
+    pred_vertices, pred_edges = predict_wireframe(a)
+    pcd, geometries = plot_reconstruction_local(None, colmap, points=True, cameras=True, crop_outliers=True)
+    wireframe = plot_wireframe_local(None, a['wf_vertices'], a['wf_edges'], a['wf_classifications'])
+    wireframe2 = plot_wireframe_local(None, pred_vertices, pred_edges, None, color='rgb(255, 0, 0)')
+    bpo_cams = plot_bpo_cameras_from_entry_local(None, a)
+    print(len(geometries), len(bpo_cams))
+    visu_all = [pcd] + geometries + wireframe + bpo_cams + wireframe2
+    o3d.visualization.draw_geometries(visu_all, window_name="3D Reconstruction")