"""Batch generate pycut superpoint.npy for all S3DIS rooms (multiprocessing).""" import os import sys import time import numpy as np from scipy.spatial import cKDTree from multiprocessing import Pool, cpu_count SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) LIBCP_DIR = os.path.join(SCRIPT_DIR, "_cut_pursuit", "build", "src") sys.path.insert(0, LIBCP_DIR) sys.path.insert(0, SCRIPT_DIR) import libcp from lib_geo import ( _normalize_xyz_np, _normalize_normals_np, _local_geom_features_chunked_np, _build_adj_graph_np, _edge_weights_chunked_np, _relabel_contiguous_np, ) S3DIS_ROOT = "/mnt/data/AODUOLI/_work_biptv3/pointcept_framework/data/s3dis_official" OUT_ROOT = os.path.join(SCRIPT_DIR, "outputs", "superpoint_pycut_all") def merge_small_components(xyz, labels, min_size=50): counts = np.bincount(labels) small_mask = counts[labels] < min_size if not small_mask.any(): return labels labels = labels.copy() large_mask = ~small_mask if not large_mask.any(): return labels tree = cKDTree(xyz[large_mask]) _, nn_idx = tree.query(xyz[small_mask], k=1) large_indices = np.where(large_mask)[0] labels[small_mask] = labels[large_indices[nn_idx]] return _relabel_contiguous_np(labels) def generate_superpoints_pycut( xyz, normals=None, k_feat=10, k_adj=10, chunk_size=8192, normal_scale=0.25, lam=0.03, sigma=0.5, min_comp_weight=20, weight_decay=0.7, merge_min_size=50, ): xyz_norm = _normalize_xyz_np(xyz) geom_feat = _local_geom_features_chunked_np(xyz_norm, k_feat=k_feat, chunk_size=chunk_size) feat_parts = [geom_feat] if normals is not None: nn = _normalize_normals_np(normals) feat_parts.append(nn * normal_scale) Y = np.hstack(feat_parts).astype(np.float32) src, dst = _build_adj_graph_np(xyz_norm, k_adj=k_adj, mutual=False, undirected=True) ew = _edge_weights_chunked_np(Y.T, src, dst, lam=1.0, sigma=sigma) components, in_component = libcp.cutpursuit( Y, src.astype(np.uint32), dst.astype(np.uint32), ew.astype(np.float32), float(lam), int(min_comp_weight), 0, float(weight_decay), ) labels = _relabel_contiguous_np(np.asarray(in_component, dtype=np.int32)) if merge_min_size > 0: labels = merge_small_components(xyz, labels, min_size=merge_min_size) return labels def process_room(args): area, room = args room_dir = os.path.join(S3DIS_ROOT, area, room) coord_path = os.path.join(room_dir, "coord.npy") normal_path = os.path.join(room_dir, "normal.npy") if not os.path.exists(coord_path): return (area, room, -1, 0, 0.0, "no coord.npy") out_dir = os.path.join(OUT_ROOT, area, room) os.makedirs(out_dir, exist_ok=True) out_path = os.path.join(out_dir, "superpoint.npy") if os.path.exists(out_path): return (area, room, -1, 0, 0.0, "exists") coord = np.load(coord_path).astype(np.float32) normals = None if os.path.exists(normal_path): normals = np.load(normal_path).astype(np.float32) t0 = time.time() labels = generate_superpoints_pycut( coord, normals=normals, lam=0.03, sigma=0.5, k_feat=10, k_adj=10, merge_min_size=50, ) dt = time.time() - t0 n_sp = int(labels.max()) + 1 np.save(out_path, labels) return (area, room, coord.shape[0], n_sp, dt, "done") if __name__ == "__main__": os.makedirs(OUT_ROOT, exist_ok=True) tasks = [] areas = sorted([d for d in os.listdir(S3DIS_ROOT) if d.startswith("Area_")]) for area in areas: area_dir = os.path.join(S3DIS_ROOT, area) rooms = sorted([r for r in os.listdir(area_dir) if os.path.isdir(os.path.join(area_dir, r))]) for room in rooms: tasks.append((area, room)) total = len(tasks) pending = [] for area, room in tasks: out_path = os.path.join(OUT_ROOT, area, room, "superpoint.npy") if not os.path.exists(out_path): pending.append((area, room)) print(f"Total: {total} rooms, already done: {total - len(pending)}, pending: {len(pending)}") n_workers = min(12, cpu_count()) print(f"Using {n_workers} workers") t_global = time.time() done_count = 0 with Pool(n_workers) as pool: for result in pool.imap_unordered(process_room, pending): done_count += 1 area, room, n_pts, n_sp, dt, status = result if status == "done": print(f"[{done_count}/{len(pending)}] {area}/{room}: {n_pts} pts -> {n_sp} sp ({dt:.1f}s)") elif status == "exists": print(f"[{done_count}/{len(pending)}] {area}/{room}: EXISTS") else: print(f"[{done_count}/{len(pending)}] {area}/{room}: SKIP ({status})") sys.stdout.flush() print(f"\nDone! Total time: {time.time() - t_global:.0f}s")