import numpy as np def build_glb_pointcloud(crop, budget, rng): """ Generates a point cloud for GLB file export, ensuring that the probabilities sum to 1 before performing the random choice operation. The point cloud is generated based on the input 'crop' array and 'budget' for the number of points. This version applies necessary voxel deduplication and normalization steps. Parameters: crop (np.ndarray): Array of data points to sample from. Expected to be a 2D array. budget (int): Number of points to sample from the 'crop' array. rng (np.random.Generator): Random number generator instance for reproducibility. Returns: np.ndarray: The sampled point cloud (flattened points selected from 'crop'). """ # Step 1: Flatten the crop to a 1D array (if it's a 2D array) flat_crop = crop.reshape(-1) # Step 2: Normalize probabilities to ensure they sum to 1 p_normalized = flat_crop / np.sum(flat_crop) # Normalize to make sure sum is 1 # Step 3: Avoid any zero values in the probabilities (clip them to avoid zero probabilities) p_normalized = np.clip(p_normalized, 1e-10, None) # Prevent zero probabilities # Step 4: Ensure uniqueness using voxel filtering (avoid double selection at the exact same position) voxel_resolution = 0.015 # Set the voxel resolution to 15 microns pos = np.stack([np.indices(crop.shape)[0].flatten(), np.indices(crop.shape)[1].flatten(), flat_crop], axis=1).astype(np.float32) pos_rounded = np.round(pos / voxel_resolution) _, unique_indices = np.unique(pos_rounded, axis=0, return_index=True) pos = pos[unique_indices] # Only keep unique voxel positions # Step 5: Randomly sample indices based on the normalized probabilities # If the size of crop is less than the budget, we fill it by resampling if flat_crop.size < budget: print("Warning: crop size smaller than budget. Resampling with replacement.") idx = rng.choice(flat_crop.size, size=budget, replace=True, p=p_normalized) else: idx = rng.choice(flat_crop.size, size=budget, replace=False, p=p_normalized) # Step 6: Extract the selected points based on the sampled indices selected_points = flat_crop[idx] # Optional: Additional processing can be applied to the point cloud here (e.g., scaling, offsetting, etc.) return selected_points # Function to use the above function in a larger workflow def process_point_cloud(crop, budget, rng): """ Process the crop data and generate a point cloud for use in a GLB file. Parameters: crop (np.ndarray): The 2D crop data to sample from. budget (int): Number of points to select for the final point cloud. rng (np.random.Generator): Random number generator. Returns: np.ndarray: A point cloud with selected points based on the input crop. """ # Call the build_glb_pointcloud function to generate the selected points point_cloud = build_glb_pointcloud(crop, budget, rng) # Further processing on the point cloud can be added if needed # For example, smoothing or other refinements return point_cloud # Example usage (assuming rng is a numpy random generator and crop is a numpy array): if __name__ == "__main__": # Create a random example 'crop' array for testing crop = np.random.random((100, 100)) # Example data array, replace with actual data budget = 1000 # Number of points to sample rng = np.random.default_rng() # Create a random generator # Generate the GLB point cloud point_cloud = process_point_cloud(crop, budget, rng) # Print the selected points for verification print("Selected points:", point_cloud)