jskvrna
/

hoho

English

hoho25k

s23dr2025

Model card Files Files and versions

xet

Community

jskvrna commited on May 29, 2025

Commit

1d64568

1 Parent(s): 9c5e3ff

xx

Browse files

Files changed (2) hide show

predict.py +350 -193
train.py +20 -10

predict.py CHANGED Viewed

@@ -16,9 +16,11 @@ from fast_pointnet import save_patches_dataset, predict_vertex_from_patch
 #import time
 from fast_pointnet_class import save_patches_dataset as save_patches_dataset_class
 from fast_pointnet_class import predict_class_from_patch
 from scipy.spatial.distance import cdist
 from scipy.optimize import linear_sum_assignment
 import torch
 GENERATE_DATASET = False
 DATASET_DIR = '/home/skvrnjan/personal/hohocustom/'
@@ -1179,6 +1181,114 @@ def generate_edge_patches_forward(frame, pred_vertices):
     return forward_patches
 def calculate_cylinder_overlap_volume(cyl1, cyl2):
     """
     Calculate the intersection volume between two cylinders using numpy vectorization.
@@ -1280,119 +1390,232 @@ def calculate_cylinder_overlap_volume(cyl1, cyl2):
     return max(0.0, overlap_volume)
 def create_pcloud(colmap_rec, frame):
-    all_imgs_ids = []
-    all_imgs = []
-    all_imgs_K = []
-    all_imgs_R = []
-    all_imgs_t = []
-    all_imgs_ade = []
-    all_imgs_gestalt = []
-    for img_id_c, col_img_obj in colmap_rec.images.items(): # Renamed col_img to col_img_obj to avoid conflict
-        all_imgs_ids.append(col_img_obj.name)
-        all_imgs.append(col_img_obj)
-    for i, (K, R, t, img_id, ade, gestalt, depth) in enumerate(zip(frame['K'], frame['R'], frame['t'], frame['image_ids'], frame['ade'], frame['gestalt'], frame['depth'])):
-        for all_imgsid in all_imgs_ids:
-            if all_imgsid == img_id:
-                all_imgs_K.append(np.array(K))
-                all_imgs_R.append(np.array(R))
-                all_imgs_t.append(np.array(t))
-                ade_mask = get_house_mask(ade)
-                all_imgs_ade.append(np.array(ade_mask))
-                depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
-                gest_seg = gestalt.resize(depth_size)
-                gest_seg_np = np.array(gest_seg).astype(np.uint8)
-                all_imgs_gestalt.append(np.array(gest_seg_np))
-    # 2) Gather 3D points that this image sees (according to COLMAP)
-    points_xyz_world = []
-    points_colors = []
-    points_idxs = []
-    points_imgs = []
-    points_uv = []
-    points_ade = []
-    points_gestalt = []
-    for pid, p3D in colmap_rec.points3D.items():
-        found = False
-        found_in_ids = []
-        uv_projections = []
-        in_ade = False
-        gest = []
-        for idx, img in enumerate(all_imgs):
-            if img.has_point3D(pid):
-                found = True
-                found_in_ids.append(img.name)
-                # Project the 3D point to image coordinates using K, R, t
-                R = all_imgs_R[idx]
-                t = all_imgs_t[idx]
-                K = all_imgs_K[idx]
-                xyz_homogeneous = np.append(p3D.xyz, 1.0)
-                world_to_cam_mat = np.hstack([R, t.reshape(3, 1)])
-                cam_coords = world_to_cam_mat @ xyz_homogeneous
-                if cam_coords[2] > 0:  # Point is in front of camera
-                    pixel_coords = np.dot(K, cam_coords)
-                    u = pixel_coords[0] / pixel_coords[2]
-                    v = pixel_coords[1] / pixel_coords[2]
-                    u = round(u)
-                    v = round(v)
-                    uv_projections.append((u, v))
-                    # Check if point is inside ADE segmentation (house mask)
-                    if 0 <= u < all_imgs_ade[idx].shape[1] and 0 <= v < all_imgs_ade[idx].shape[0]:
-                        in_ade = all_imgs_ade[idx][v, u]  # Point is inside house mask
-                    else:
-                        in_ade = False  # Default to False if out of bounds
-                    # Check gestalt segmentation value at this point
-                    if 0 <= u < all_imgs_gestalt[idx].shape[1] and 0 <= v < all_imgs_gestalt[idx].shape[0]:
-                        gestalt_value = all_imgs_gestalt[idx][v, u]
-                        gest.append(gestalt_value)
-                    else:
-                        gest.append(np.array([0,0,0]))  # Default value for out-of-bounds
-        if found:
-            points_xyz_world.append(p3D.xyz)  # world coords
-            points_colors.append(p3D.color / 255.0)  # normalize to [0,1]
-            points_idxs.append(pid)
-            points_imgs.append(found_in_ids)
-            points_uv.append(uv_projections)
-            points_ade.append(in_ade)
-            points_gestalt.append(gest)
-    points_xyz_world = np.array(points_xyz_world) if points_xyz_world else np.empty((0, 3))
-    points_colors = np.array(points_colors) if points_colors else np.empty((0, 3))
-    points_idxs = np.array(points_idxs) if points_idxs else np.empty((0,))
-    points_ade = np.array(points_ade) if points_ade else np.empty((0,))
-    # Create 7D point cloud from COLMAP data (xyz + rgb + img_count)
     if points_xyz_world.shape[0] > 0:
-        colmap_points_7d = np.zeros((len(points_xyz_world), 7))
-        colmap_points_7d[:, :3] = points_xyz_world  # xyz coordinates
-        colmap_points_7d[:, 3:6] = points_colors    # rgb colors
-        colmap_points_7d[:, 6] = points_idxs
-        whole_pcloud = {'points_7d': colmap_points_7d,
-                        'imgs': points_imgs,
-                        'uv': points_uv,
-                        'all_imgs_ids': all_imgs_ids,
-                        'all_imgs_K': all_imgs_K,
-                        'all_imgs_R': all_imgs_R,
-                        'all_imgs_t': all_imgs_t,
-                        'ade': points_ade,
-                        'gestalt': points_gestalt}
     else:
-        whole_pcloud = {'points_7d': np.empty((0, 7)),
-                        'ids': np.empty((0,)),
-                        'imgs': [],
-                        'uv': []}
     return whole_pcloud
 def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config) -> Tuple[np.ndarray, List[int]]:
@@ -1401,11 +1624,19 @@ def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config)
     """
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     good_entry = convert_entry_to_human_readable(entry)
     colmap_rec = good_entry['colmap_binary']
-    colmap_pcloud = create_pcloud(colmap_rec, good_entry)
     vertex_threshold = config.get('vertex_threshold', 0.5)
     edge_threshold = config.get('edge_threshold', 0.5)
@@ -1415,8 +1646,6 @@ def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config)
     idxs_points = []
     all_connections = []
-    print(f"Processing {len(good_entry['gestalt'])} images")
     for i, (gest, depth, K, R, t, img_id, ade_seg) in enumerate(zip(good_entry['gestalt'],
                                                 good_entry['depth'],
                                                 good_entry['K'],
@@ -1425,6 +1654,7 @@ def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config)
                                                 good_entry['image_ids'],
                                                 good_entry['ade'] # Added ade20k segmentation
                                                 )):
         # Visualize gestalt segmentation
         K = np.array(K)
         R = np.array(R)
@@ -1436,107 +1666,35 @@ def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
         vertices_ours, connections_ours, vertices_3d_ours, patches, filtered_point_idxs = our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id, ade_seg, depth, K=K, R=R, t=t, frame=good_entry)
         idxs_points.append(filtered_point_idxs)
         all_connections.append(connections_ours)
-        '''
-        if GENERATE_DATASET:
-            save_patches_dataset(patches, DATASET_DIR, img_id)
-            continue
-        '''
-        #for idx, patch in enumerate(patches):
-            #pred_vertex, pred_dist, pred_class = predict_vertex_from_patch(pnet_model, patch, device=device)
-            #vertices_3d_ours[idx] = pred_vertex
-            #visu_patch_and_pred(patch, pred_vertex, pred_dist, pred_class)
-        #    x = 0
         vertices, connections, vertices_3d = vertices_ours, connections_ours, vertices_3d_ours
-        # Get 2D vertices and edges first
-        #vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.)
-        #gt_verts = []
-        #gt_verts, gt_connects, gt_verts3d = get_gt_vertices_and_edges(good_entry, i, depth, colmap_rec, K, R, t, img_id, ade_seg)
-        #vertices, connections = gt_verts, gt_connects
-        if False:
-            gest.save(f'gestalt/{img_id}.png')
-            # Save ADE20k segmentation
-            # ade_seg is already a PIL Image
-            try:
-                ade_seg.save(f'ade_segmentations/{img_id}_ade.png')
-            except Exception as e:
-                print(f"Could not save ADE segmentation for {img_id}: {e}")
-            save_gestalt_with_proj(gest_seg_np, gt_verts, img_id)
-            # Define a local helper function to draw crosses and save the image
-            # Draw crosses on the ADE segmentation image and save it
-            # 'vertices' here refers to gt_verts
-            draw_crosses_on_image(ade_seg, vertices, f'crosses_{img_id}.png', color=(0, 0, 0), size=5)
-        # Check if we have enough to proceed
-        if (len(vertices) < 2) or (len(connections) < 1) and False:
-            print(f'Not enough vertices or connections found in image {i}, skipping.')
-            vert_edge_per_image[i] = [], [], np.empty((0, 3))
-            continue
-        # Call the refactored function to get 3D points
-        #vertices_3d = create_3d_wireframe_single_image(vertices, connections, depth, colmap_rec, img_id, ade_seg, K, R, t)
-        #vertices_3d = gt_verts3d
-        # Store original 2D vertices, connections, and computed 3D points
-        #connections = []
-        if False:
-            pcd, geometries = plot_reconstruction_local(None, colmap_rec, points=True, cameras=True, crop_outliers=True)
-            wireframe = plot_wireframe_local(None, good_entry['wf_vertices'], good_entry['wf_edges'], good_entry['wf_classifications'])
-            wireframe2 = plot_wireframe_local(None, vertices_3d_ours, connections_ours, None, color='rgb(255, 0, 0)')
-            wireframe3 = plot_wireframe_local(None, vertices_3d, connections, None, color='rgb(0, 0, 255)')
-            bpo_cams = plot_bpo_cameras_from_entry_local(None, good_entry)
-            visu_all = [pcd] + geometries + wireframe + bpo_cams + wireframe2 + wireframe3
-        #o3d.visualization.draw_geometries(visu_all, window_name="3D Reconstruction")
         vert_edge_per_image[i] = vertices, connections, vertices_3d
     extracted_points, extracted_colors, extracted_ids, whole_pcloud, connections = extract_vertices_from_whole_pcloud(colmap_rec, idxs_points, all_connections)
-    patches = generate_patches_v2(extracted_points, extracted_colors, extracted_ids, whole_pcloud, good_entry['wf_vertices'])
-    # Predict vertices from patches using the neural network
     predicted_vertices = []
-    for patch in patches:
         pred_vertex, pred_dist, pred_class = predict_vertex_from_patch(pnet_model, patch, device=device)
-        #visu_patch_and_pred(patch, pred_vertex, pred_dist, pred_class)
         if pred_class > vertex_threshold:
             predicted_vertices.append(pred_vertex)
         else:
             predicted_vertices.append(np.array([0.0, 0.0, 0.0]))  # Append a zero vertex if not predicted
-        #pred_vertex_voxel, pred_dist_voxel, pred_class_voxel = predict_vertex_from_patch_voxel(voxel_model, patch, device=device)
-        #visu_patch_and_pred(patch, pred_vertex_voxel, pred_dist_voxel, pred_class_voxel)
     predicted_vertices = np.array(predicted_vertices) if predicted_vertices else np.empty((0, 3))
-    #visu_pcloud_and_preds(colmap_rec, extracted_ids, extracted_points, extracted_colors, predicted_vertices, connections)
     if GENERATE_DATASET:
         save_patches_dataset(patches, DATASET_DIR, img_id)
         return empty_solution()
-    # Merge vertices from all images
-    #all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 0.1)
-    #all_3d_vertices_clean, connections_3d_clean = all_3d_vertices, connections_3d
-    #all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d, keep_largest=False)
-    #all_3d_vertices_clean, connections_3d_clean  = prune_too_far(all_3d_vertices_clean, connections_3d_clean, colmap_rec, th = 1.5)
-    #if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1 and False:
-    #    print (f'Not enough vertices or connections in the 3D vertices')
-    #    return empty_solution()
     # Filter out zero vertices and update connections accordingly
     non_zero_mask = ~np.all(np.isclose(predicted_vertices, [0.0, 0.0, 0.0]), axis=1)
     valid_indices = np.where(non_zero_mask)[0]
@@ -1544,11 +1702,9 @@ def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config)
     # Filter vertices to only include non-zero ones
     filtered_vertices = predicted_vertices[valid_indices]
-    #patches = generate_edge_patches(good_entry, filtered_vertices, colmap_pcloud)
     if GENERATE_DATASET_EDGES:
         patches = generate_edge_patches(good_entry, filtered_vertices, colmap_pcloud)
         save_patches_dataset_class(patches, EDGES_DATASET_DIR, good_entry['order_id'])
         return empty_solution()
     if len(valid_indices) == 0:
@@ -1566,17 +1722,18 @@ def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config)
             new_end = old_to_new_mapping[end_idx]
             if new_start != new_end:  # Ensure we don't connect a vertex to itself
                 filtered_connections.append((new_start, new_end))
-    #print(f"Filtered vertices: {len(filtered_vertices)} from {len(predicted_vertices)}")
-    #print(f"Filtered connections: {len(filtered_connections)} from {len(connections)}")
     forward_patches = generate_edge_patches_forward(good_entry, filtered_vertices)
     new_connections = []
     if len(forward_patches) > 0:
-        for patch in forward_patches:
             start_idx, end_idx = patch['connection']
             pred_class, pred_score = predict_class_from_patch(pnet_class_model, patch, device=device)
             if pred_score > edge_threshold:
                 new_connections.append((start_idx, end_idx))

 #import time
 from fast_pointnet_class import save_patches_dataset as save_patches_dataset_class
 from fast_pointnet_class import predict_class_from_patch
+from fast_pointnet_class_10d import predict_class_from_patch as predict_class_from_patch_10d
 from scipy.spatial.distance import cdist
 from scipy.optimize import linear_sum_assignment
 import torch
+import time
 GENERATE_DATASET = False
 DATASET_DIR = '/home/skvrnjan/personal/hohocustom/'
     return forward_patches
+def generate_edge_patches_forward_10d(frame, pred_vertices, colmap_pcloud):
+    vertices = pred_vertices
+    cylinder_radius = 0.5 # meters
+    points_6d = colmap_pcloud['points_7d'][:, :7]
+    points_6d[:, 3:6] = points_6d[:, 3:6] * 2 - 1  # Normalize RGB colors to [0, 1]
+    ade = colmap_pcloud['ade']
+    ade = np.where(ade, 1, -1) # Normalize to [-1, 1]
+    gestalt = colmap_pcloud['gestalt']
+    # Fuse multiple gestalt values per point using majority voting
+    fused_gestalt = []
+    for point_gestalt_list in gestalt:
+        if len(point_gestalt_list) == 0:
+            fused_gestalt.append(np.array([0, 0, 0]))
+        elif len(point_gestalt_list) == 1:
+            fused_gestalt.append(point_gestalt_list[0])
+        else:
+            # Convert to numpy array for easier manipulation
+            gestalt_values = np.array(point_gestalt_list)
+            # Method 1: Average the RGB values
+            fused_value = np.mean(gestalt_values, axis=0).astype(np.uint8)
+            fused_gestalt.append(fused_value)
+    gestalt = np.array(fused_gestalt)
+    gestalt = (gestalt / 255) * 2 - 1  # Normalize to [-1, 1]
+    # Extract 3D coordinates for faster vectorized operations
+    colmap_points_3d = points_6d[:, :3]
+    # Create combined 10D point cloud (xyz + rgb + ade + gestalt)
+    colmap_points_10d = np.zeros((len(colmap_points_3d), 10))
+    colmap_points_10d[:, :3] = colmap_points_3d  # xyz coordinates
+    colmap_points_10d[:, 3:6] = points_6d[:, 3:6]  # rgb colors (already normalized to [-1, 1])
+    colmap_points_10d[:, 6] = ade  # ade values (normalized to [-1, 1])
+    colmap_points_10d[:, 7:10] = gestalt  # gestalt values (normalized to [-1, 1], all 3 RGB channels)
+    forward_patches = []
+    # For each vertex pair, create a patch without label
+    for i in range(len(vertices)):
+        for j in range(i + 1, len(vertices)):
+            start_vertex = vertices[i]
+            end_vertex = vertices[j]
+            # Create line vector from start to end
+            line_vector = end_vertex - start_vertex
+            line_length = np.linalg.norm(line_vector)
+            # Normalize line vector
+            line_direction = line_vector / line_length
+            # Extend the line by 1 meter on both ends for more context
+            extension_length = 0.25  # 1 meter in meters
+            extended_start = start_vertex - extension_length * line_direction
+            extended_end = end_vertex + extension_length * line_direction
+            extended_line_length = line_length + 2 * extension_length
+            # Vectorized distance calculation
+            # Vector from extended start to all points
+            start_to_points = colmap_points_3d - extended_start[np.newaxis, :]
+            # Project onto line direction to get distance along extended line
+            projection_lengths = np.dot(start_to_points, line_direction)
+            # Filter points within extended line segment bounds
+            within_bounds = (projection_lengths >= 0) & (projection_lengths <= extended_line_length)
+            # Find closest points on extended line segment for all points
+            closest_points_on_line = extended_start[np.newaxis, :] + projection_lengths[:, np.newaxis] * line_direction[np.newaxis, :]
+            # Calculate perpendicular distances from points to line
+            perpendicular_distances = np.linalg.norm(colmap_points_3d - closest_points_on_line, axis=1)
+            # Find points within cylinder
+            within_cylinder = within_bounds & (perpendicular_distances <= cylinder_radius)
+            if np.sum(within_cylinder) <= 10:
+                continue
+            points_in_cylinder = colmap_points_10d[within_cylinder]
+            point_indices_in_cylinder = np.where(within_cylinder)[0]
+            # Center the patch at the midpoint of the original line (not extended)
+            line_midpoint = (start_vertex + end_vertex) / 2
+            # Shift points to center around origin
+            points_centered = points_in_cylinder.copy()
+            points_centered[:, :3] -= line_midpoint
+            # Create edge patch without label
+            edge_patch = {
+                'patch_10d': points_centered,
+                'connection': (i, j),
+                'line_start': start_vertex - line_midpoint,
+                'line_end': end_vertex - line_midpoint,
+                'cylinder_radius': cylinder_radius,
+                'point_indices': point_indices_in_cylinder,
+                'center': line_midpoint
+            }
+            forward_patches.append(edge_patch)
+    return forward_patches
 def calculate_cylinder_overlap_volume(cyl1, cyl2):
     """
     Calculate the intersection volume between two cylinders using numpy vectorization.
     return max(0.0, overlap_volume)
 def create_pcloud(colmap_rec, frame):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    #print(f"create_pcloud using device: {device}")
+    # 1. Preprocess image data from the frame and colmap (mostly on CPU)
+    img_id_to_colmap_img_obj_map = {
+        img_obj.name: img_obj for img_obj_name, img_obj in colmap_rec.images.items()
+    }
+    frame_img_data = {}
+    ordered_frame_img_ids = []
+    for K_val, R_val, t_val, img_id_val, ade_val, gestalt_val, depth_val in zip(
+        frame['K'], frame['R'], frame['t'], frame['image_ids'],
+        frame['ade'], frame['gestalt'], frame['depth']
+    ):
+        if img_id_val not in img_id_to_colmap_img_obj_map:
+            continue
+        ordered_frame_img_ids.append(img_id_val)
+        depth_np = np.array(depth_val)
+        depth_H, depth_W = depth_np.shape[0], depth_np.shape[1]
+        ade_mask_np = get_house_mask(ade_val)
+        gest_seg_pil = gestalt_val.resize((depth_W, depth_H), Image.Resampling.NEAREST)
+        gest_seg_np = np.array(gest_seg_pil).astype(np.uint8)
+        frame_img_data[img_id_val] = {
+            'K_np': np.array(K_val),
+            'R_np': np.array(R_val),
+            't_np': np.array(t_val).reshape(3,1),
+            'ade_mask_np': ade_mask_np,
+            'gestalt_seg_np': gest_seg_np,
+            'H': depth_H,
+            'W': depth_W
+        }
+    # 2. Process 3D points by iterating through images
+    point_data_accumulator = {} # Key: pid, accumulates data on CPU
+    # Pre-fetch all COLMAP point data to avoid repeated dictionary lookups
+    colmap_points_data_cpu = {
+        pid: {'xyz': p3D.xyz, 'color': p3D.color / 255.0}
+        for pid, p3D in colmap_rec.points3D.items()
+    }
+    for img_id in ordered_frame_img_ids:
+        if img_id not in frame_img_data:
+            continue
+        col_img_obj = img_id_to_colmap_img_obj_map[img_id]
+        img_data = frame_img_data[img_id]
+        K_np, R_np, t_np = img_data['K_np'], img_data['R_np'], img_data['t_np']
+        ade_mask_np, gestalt_seg_np = img_data['ade_mask_np'], img_data['gestalt_seg_np']
+        H, W = img_data['H'], img_data['W']
+        # Convert current image data to GPU tensors
+        K_gpu = torch.from_numpy(K_np).float().to(device)
+        R_gpu = torch.from_numpy(R_np).float().to(device)
+        t_gpu = torch.from_numpy(t_np).float().to(device)
+        ade_mask_gpu = torch.from_numpy(ade_mask_np).bool().to(device)
+        gestalt_seg_gpu = torch.from_numpy(gestalt_seg_np).to(device) # uint8 is fine
+        visible_pids_in_img = []
+        visible_xyz_coords_list = []
+        for pid, p3D_data in colmap_points_data_cpu.items():
+            if col_img_obj.has_point3D(pid): # This check remains CPU-bound
+                visible_pids_in_img.append(pid)
+                visible_xyz_coords_list.append(p3D_data['xyz'])
+        if not visible_pids_in_img:
+            continue
+        num_visible_points = len(visible_pids_in_img)
+        world_pts_np = np.array(visible_xyz_coords_list)
+        world_pts_gpu = torch.from_numpy(world_pts_np).float().to(device)
+        # Batch projection on GPU
+        world_pts_h_gpu = torch.cat((world_pts_gpu, torch.ones(num_visible_points, 1, device=device)), dim=1)
+        P_world_to_cam_gpu = torch.hstack((R_gpu, t_gpu))
+        cam_coords_proj_gpu = P_world_to_cam_gpu @ world_pts_h_gpu.T
+        cam_coords_z_gpu = cam_coords_proj_gpu[2, :]
+        in_front_mask_gpu = cam_coords_z_gpu > 1e-6
+        pixel_coords_h_gpu = K_gpu @ cam_coords_proj_gpu
+        u_proj_gpu = torch.full_like(cam_coords_z_gpu, -1.0, dtype=torch.float32)
+        v_proj_gpu = torch.full_like(cam_coords_z_gpu, -1.0, dtype=torch.float32)
+        # Avoid division by zero/small numbers for points not truly in front or on optical center
+        valid_depth_mask_gpu = in_front_mask_gpu & (torch.abs(cam_coords_z_gpu) > 1e-6)
+        if torch.any(valid_depth_mask_gpu):
+            u_proj_gpu[valid_depth_mask_gpu] = pixel_coords_h_gpu[0, valid_depth_mask_gpu] / cam_coords_z_gpu[valid_depth_mask_gpu]
+            v_proj_gpu[valid_depth_mask_gpu] = pixel_coords_h_gpu[1, valid_depth_mask_gpu] / cam_coords_z_gpu[valid_depth_mask_gpu]
+        u_rounded_gpu = torch.round(u_proj_gpu).long()
+        v_rounded_gpu = torch.round(v_proj_gpu).long()
+        is_in_bounds_gpu = (u_rounded_gpu >= 0) & (u_rounded_gpu < W) & \
+                            (v_rounded_gpu >= 0) & (v_rounded_gpu < H) & \
+                            in_front_mask_gpu # Re-check in_front_mask_gpu as rounding might affect edge cases slightly
+        # Sample ADE and Gestalt on GPU for points in bounds
+        # Initialize with default values for all points, then update for those in bounds
+        sampled_ade_status_gpu = torch.zeros(num_visible_points, dtype=torch.bool, device=device)
+        sampled_gestalt_values_gpu = torch.zeros(num_visible_points, 3, dtype=torch.uint8, device=device)
+        # Create a mask for points that are valid for sampling (in_bounds and in_front)
+        valid_for_sampling_mask_gpu = is_in_bounds_gpu
+        if torch.any(valid_for_sampling_mask_gpu):
+            u_sample_gpu = u_rounded_gpu[valid_for_sampling_mask_gpu]
+            v_sample_gpu = v_rounded_gpu[valid_for_sampling_mask_gpu]
+            sampled_ade_status_gpu[valid_for_sampling_mask_gpu] = ade_mask_gpu[v_sample_gpu, u_sample_gpu]
+            sampled_gestalt_values_gpu[valid_for_sampling_mask_gpu] = gestalt_seg_gpu[v_sample_gpu, u_sample_gpu]
+        # Transfer necessary results back to CPU for accumulation
+        u_rounded_cpu = u_rounded_gpu.cpu().numpy()
+        v_rounded_cpu = v_rounded_gpu.cpu().numpy()
+        is_in_bounds_cpu = is_in_bounds_gpu.cpu().numpy() # Use the original is_in_bounds_gpu for logic
+        sampled_ade_status_cpu = sampled_ade_status_gpu.cpu().numpy()
+        sampled_gestalt_values_cpu = sampled_gestalt_values_gpu.cpu().numpy()
+        # Update accumulator (on CPU)
+        for i in range(num_visible_points):
+            pid = visible_pids_in_img[i]
+            if pid not in point_data_accumulator:
+                point_data_accumulator[pid] = {
+                    'xyz': colmap_points_data_cpu[pid]['xyz'],
+                    'color': colmap_points_data_cpu[pid]['color'],
+                    'imgs_seen_by': [],
+                    'uv_projections': [],
+                    'ade_status': False,
+                    'gestalt_values': []
+                }
+            acc = point_data_accumulator[pid]
+            acc['imgs_seen_by'].append(img_id)
+            acc['uv_projections'].append((u_rounded_cpu[i], v_rounded_cpu[i]))
+            if is_in_bounds_cpu[i]: # This point was projected within bounds and in front
+                acc['ade_status'] = sampled_ade_status_cpu[i]
+                acc['gestalt_values'].append(sampled_gestalt_values_cpu[i])
+            else: # Point projected out of bounds, behind, or failed depth check
+                acc['gestalt_values'].append(np.array([0,0,0], dtype=np.uint8))
+        # Optional: clear GPU cache if memory is a concern for many images
+        # if device.type == 'cuda':
+        #     torch.cuda.empty_cache()
+    # 3. Final data assembly (on CPU)
+    points_xyz_world_list = []
+    points_colors_list = []
+    points_idxs_list = []
+    points_imgs_seen_by_list = []
+    points_uv_projections_per_point_list = []
+    points_ade_status_final_list = []
+    points_gestalt_values_per_point_list = []
+    # Ensure consistent order if downstream code relies on it, though original didn't specify sorting for pids
+    # Using sorted_pids for reproducibility if point_data_accumulator keys order changes.
+    sorted_pids = sorted(point_data_accumulator.keys())
+    for pid in sorted_pids:
+        data = point_data_accumulator[pid]
+        points_xyz_world_list.append(data['xyz'])
+        points_colors_list.append(data['color'])
+        points_idxs_list.append(pid)
+        points_imgs_seen_by_list.append(data['imgs_seen_by'])
+        points_uv_projections_per_point_list.append(data['uv_projections'])
+        points_ade_status_final_list.append(data['ade_status'])
+        points_gestalt_values_per_point_list.append(data['gestalt_values'])
+    points_xyz_world = np.array(points_xyz_world_list) if points_xyz_world_list else np.empty((0, 3))
+    points_colors = np.array(points_colors_list) if points_colors_list else np.empty((0, 3))
+    points_idxs = np.array(points_idxs_list, dtype=int) if points_idxs_list else np.empty((0,), dtype=int) # Ensure dtype for pids
+    points_ade = np.array(points_ade_status_final_list, dtype=bool) if points_ade_status_final_list else np.empty((0,), dtype=bool)
+    output_all_colmap_img_ids = [img_obj.name for img_obj_name, img_obj in colmap_rec.images.items()]
+    output_frame_K, output_frame_R, output_frame_t = [], [], []
+    for img_id_val in frame['image_ids']:
+        if img_id_val in frame_img_data:
+             data = frame_img_data[img_id_val]
+             output_frame_K.append(data['K_np'])
+             output_frame_R.append(data['R_np'])
+             output_frame_t.append(data['t_np'])
     if points_xyz_world.shape[0] > 0:
+        colmap_points_7d = np.zeros((points_xyz_world.shape[0], 7))
+        colmap_points_7d[:, :3] = points_xyz_world
+        colmap_points_7d[:, 3:6] = points_colors
+        colmap_points_7d[:, 6] = points_idxs
+        whole_pcloud = {
+            'points_7d': colmap_points_7d,
+            'imgs': points_imgs_seen_by_list,
+            'uv': points_uv_projections_per_point_list,
+            'all_imgs_ids': output_all_colmap_img_ids,
+            'all_imgs_K': output_frame_K,
+            'all_imgs_R': output_frame_R,
+            'all_imgs_t': output_frame_t,
+            'ade': points_ade,
+            'gestalt': points_gestalt_values_per_point_list
+        }
     else:
+        whole_pcloud = {
+            'points_7d': np.empty((0, 7)),
+            'imgs': [],
+            'uv': [],
+            'all_imgs_ids': output_all_colmap_img_ids,
+            'all_imgs_K': output_frame_K,
+            'all_imgs_R': output_frame_R,
+            'all_imgs_t': output_frame_t,
+            'ade': np.empty((0,), dtype=bool),
+            'gestalt': []
+        }
     return whole_pcloud
 def predict_wireframe(entry, pnet_model, voxel_model, pnet_class_model, config) -> Tuple[np.ndarray, List[int]]:
     """
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    # Delete specified keys from the entry
+    #keys_to_delete = ['wf_vertices', 'wf_edges', 'wf_classifications']
+    #for key in keys_to_delete:
+    #    if key in entry:
+    #        del entry[key]
     good_entry = convert_entry_to_human_readable(entry)
     colmap_rec = good_entry['colmap_binary']
+    #start_time = time.time()
+    #colmap_pcloud = create_pcloud(colmap_rec, good_entry)
+    #end_time = time.time()
+    #print(f"create_pcloud took {end_time - start_time:.4f} seconds")
     vertex_threshold = config.get('vertex_threshold', 0.5)
     edge_threshold = config.get('edge_threshold', 0.5)
     idxs_points = []
     all_connections = []
     for i, (gest, depth, K, R, t, img_id, ade_seg) in enumerate(zip(good_entry['gestalt'],
                                                 good_entry['depth'],
                                                 good_entry['K'],
                                                 good_entry['image_ids'],
                                                 good_entry['ade'] # Added ade20k segmentation
                                                 )):
         # Visualize gestalt segmentation
         K = np.array(K)
         R = np.array(R)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
         vertices_ours, connections_ours, vertices_3d_ours, patches, filtered_point_idxs = our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id, ade_seg, depth, K=K, R=R, t=t, frame=good_entry)
         idxs_points.append(filtered_point_idxs)
         all_connections.append(connections_ours)
         vertices, connections, vertices_3d = vertices_ours, connections_ours, vertices_3d_ours
         vert_edge_per_image[i] = vertices, connections, vertices_3d
     extracted_points, extracted_colors, extracted_ids, whole_pcloud, connections = extract_vertices_from_whole_pcloud(colmap_rec, idxs_points, all_connections)
+    wf_vertices = good_entry.get('wf_vertices', None)
+    patches = generate_patches_v2(extracted_points, extracted_colors, extracted_ids, whole_pcloud, wf_vertices)
     predicted_vertices = []
+    for i, patch in enumerate(patches):
         pred_vertex, pred_dist, pred_class = predict_vertex_from_patch(pnet_model, patch, device=device)
         if pred_class > vertex_threshold:
             predicted_vertices.append(pred_vertex)
         else:
             predicted_vertices.append(np.array([0.0, 0.0, 0.0]))  # Append a zero vertex if not predicted
     predicted_vertices = np.array(predicted_vertices) if predicted_vertices else np.empty((0, 3))
     if GENERATE_DATASET:
         save_patches_dataset(patches, DATASET_DIR, img_id)
         return empty_solution()
     # Filter out zero vertices and update connections accordingly
     non_zero_mask = ~np.all(np.isclose(predicted_vertices, [0.0, 0.0, 0.0]), axis=1)
     valid_indices = np.where(non_zero_mask)[0]
     # Filter vertices to only include non-zero ones
     filtered_vertices = predicted_vertices[valid_indices]
     if GENERATE_DATASET_EDGES:
         patches = generate_edge_patches(good_entry, filtered_vertices, colmap_pcloud)
         save_patches_dataset_class(patches, EDGES_DATASET_DIR, good_entry['order_id'])
         return empty_solution()
     if len(valid_indices) == 0:
             new_end = old_to_new_mapping[end_idx]
             if new_start != new_end:  # Ensure we don't connect a vertex to itself
                 filtered_connections.append((new_start, new_end))
+    # Generate forward edge patches
+    #forward_patches = generate_edge_patches_forward_10d(good_entry, filtered_vertices, colmap_pcloud)
     forward_patches = generate_edge_patches_forward(good_entry, filtered_vertices)
     new_connections = []
     if len(forward_patches) > 0:
+        for i, patch in enumerate(forward_patches):
             start_idx, end_idx = patch['connection']
             pred_class, pred_score = predict_class_from_patch(pnet_class_model, patch, device=device)
             if pred_score > edge_threshold:
                 new_connections.append((start_idx, end_idx))

train.py CHANGED Viewed

@@ -17,11 +17,13 @@ from tqdm import tqdm
 from fast_pointnet import load_pointnet_model
 from fast_voxel import load_3dcnn_model
 from fast_pointnet_class import load_pointnet_model as load_pointnet_class_model
 import torch
-#ds = load_dataset("usm3d/hoho25k", cache_dir="/media/skvrnjan/sd/hoho25k/", trust_remote_code=True)
-ds = load_dataset("usm3d/hoho25k", cache_dir="/mnt/personal/skvrnjan/hoho25k/", trust_remote_code=True)
-ds = ds.shuffle()
 scores_hss = []
 scores_f1 = []
@@ -31,12 +33,13 @@ show_visu = False
 device = "cuda" if torch.cuda.is_available() else "cpu"
-#pnet_model = load_pointnet_model(model_path="/home/skvrnjan/personal/hoho_pnet/initial_epoch_100.pth", device=device, predict_score=True)
-pnet_model = load_pointnet_model(model_path="/mnt/personal/skvrnjan/hoho_pnet/initial_epoch_100.pth", device=device, predict_score=True)
 #pnet_model = None
 #pnet_class_model = load_pointnet_class_model(model_path="/home/skvrnjan/personal/hoho_pnet_edges_v2/initial_epoch_100.pth", device=device)
-pnet_class_model = load_pointnet_class_model(model_path="/mnt/personal/skvrnjan/hoho_pnet_edges_v2/initial_epoch_100.pth", device=device)
 #pnet_class_model = None
 #voxel_model = load_3dcnn_model(model_path="/home/skvrnjan/personal/hoho_voxel/initial_epoch_100.pth", device=device, predict_score=True)
@@ -45,13 +48,20 @@ voxel_model = None
 config = {'vertex_threshold': 0.4, 'edge_threshold': 0.6, 'only_predicted_connections': False}
 idx = 0
 for a in tqdm(ds['train'], desc="Processing dataset"):
     #plot_all_modalities(a)
     #pred_vertices, pred_edges = predict_wireframe_old(a)
-    #pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model, config)
     try:
-        pred_vertices, pred_edges = predict_wireframe(a, pnet_model, voxel_model, pnet_class_model, config)
         #pred_vertices, pred_edges = predict_wireframe_old(a)
     except:
         pred_vertices, pred_edges = empty_solution()
@@ -72,8 +82,8 @@ for a in tqdm(ds['train'], desc="Processing dataset"):
         o3d.visualization.draw_geometries(visu_all, window_name=f"3D Reconstruction - HSS: {score.hss:.4f}, F1: {score.f1:.4f}, IoU: {score.iou:.4f}")
     idx += 1
-    #if idx >= 100:  # Limit to first 10 samples for testing
-    #    break
 for i in range(10):
     print("END OF DATASET")

 from fast_pointnet import load_pointnet_model
 from fast_voxel import load_3dcnn_model
 from fast_pointnet_class import load_pointnet_model as load_pointnet_class_model
+from fast_pointnet_class_10d import load_pointnet_model as load_pointnet_class_model_10d
 import torch
+import time
+ds = load_dataset("usm3d/hoho25k", cache_dir="/media/skvrnjan/sd/hoho25k/", trust_remote_code=True)
+#ds = load_dataset("usm3d/hoho25k", cache_dir="/mnt/personal/skvrnjan/hoho25k/", trust_remote_code=True)
+#ds = ds.shuffle()
 scores_hss = []
 scores_f1 = []
 device = "cuda" if torch.cuda.is_available() else "cpu"
+pnet_model = load_pointnet_model(model_path="pnet.pth", device=device, predict_score=True)
+#pnet_model = load_pointnet_model(model_path="/mnt/personal/skvrnjan/hoho_pnet/initial_epoch_100.pth", device=device, predict_score=True)
 #pnet_model = None
 #pnet_class_model = load_pointnet_class_model(model_path="/home/skvrnjan/personal/hoho_pnet_edges_v2/initial_epoch_100.pth", device=device)
+#pnet_class_model = load_pointnet_class_model_10d(model_path="/home/skvrnjan/personal/hoho_pnet_edges_10d/initial_epoch_75.pth", device=device)
+pnet_class_model = load_pointnet_class_model(model_path="pnet_class.pth", device=device)
 #pnet_class_model = None
 #voxel_model = load_3dcnn_model(model_path="/home/skvrnjan/personal/hoho_voxel/initial_epoch_100.pth", device=device, predict_score=True)
 config = {'vertex_threshold': 0.4, 'edge_threshold': 0.6, 'only_predicted_connections': False}
 idx = 0
+prediction_times = []
 for a in tqdm(ds['train'], desc="Processing dataset"):
     #plot_all_modalities(a)
     #pred_vertices, pred_edges = predict_wireframe_old(a)
+    pred_vertices, pred_edges = predict_wireframe(a.copy(), pnet_model, voxel_model, pnet_class_model, config)
     try:
+        start_time = time.time()
+        pred_vertices, pred_edges = predict_wireframe(a.copy(), pnet_model, voxel_model, pnet_class_model, config)
         #pred_vertices, pred_edges = predict_wireframe_old(a)
+        end_time = time.time()
+        prediction_time = end_time - start_time
+        prediction_times.append(prediction_time)
+        mean_time = np.mean(prediction_times)
+        print(f"Prediction time: {prediction_time:.4f} seconds, Mean time: {mean_time:.4f} seconds")
     except:
         pred_vertices, pred_edges = empty_solution()
         o3d.visualization.draw_geometries(visu_all, window_name=f"3D Reconstruction - HSS: {score.hss:.4f}, F1: {score.f1:.4f}, IoU: {score.iou:.4f}")
     idx += 1
+    if idx >= 100:  # Limit to first 10 samples for testing
+        break
 for i in range(10):
     print("END OF DATASET")