Spaces:
Sleeping
Sleeping
| /** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved. | |
| * NVIDIA CORPORATION and its licensors retain all intellectual property | |
| * and proprietary rights in and to this software, related documentation | |
| * and any modifications thereto. Any use, reproduction, disclosure or | |
| * distribution of this software and related documentation without an express | |
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | |
| */ | |
| using namespace wp; | |
| namespace | |
| { | |
| // host-side copy of mesh descriptors, maps GPU mesh address (id) to a CPU desc | |
| std::map<uint64_t, HashGrid> g_hash_grid_descriptors; | |
| } // anonymous namespace | |
| namespace wp | |
| { | |
| bool hash_grid_get_descriptor(uint64_t id, HashGrid& grid) | |
| { | |
| const auto& iter = g_hash_grid_descriptors.find(id); | |
| if (iter == g_hash_grid_descriptors.end()) | |
| return false; | |
| else | |
| grid = iter->second; | |
| return true; | |
| } | |
| void hash_grid_add_descriptor(uint64_t id, const HashGrid& grid) | |
| { | |
| g_hash_grid_descriptors[id] = grid; | |
| } | |
| void hash_grid_rem_descriptor(uint64_t id) | |
| { | |
| g_hash_grid_descriptors.erase(id); | |
| } | |
| // implemented in hashgrid.cu | |
| void hash_grid_rebuild_device(const HashGrid& grid, const wp::vec3* points, int num_points); | |
| } // namespace wp | |
| // host methods | |
| uint64_t hash_grid_create_host(int dim_x, int dim_y, int dim_z) | |
| { | |
| HashGrid* grid = new HashGrid(); | |
| memset(grid, 0, sizeof(HashGrid)); | |
| grid->dim_x = dim_x; | |
| grid->dim_y = dim_y; | |
| grid->dim_z = dim_z; | |
| const int num_cells = dim_x*dim_y*dim_z; | |
| grid->cell_starts = (int*)alloc_host(num_cells*sizeof(int)); | |
| grid->cell_ends = (int*)alloc_host(num_cells*sizeof(int)); | |
| return (uint64_t)(grid); | |
| } | |
| void hash_grid_destroy_host(uint64_t id) | |
| { | |
| HashGrid* grid = (HashGrid*)(id); | |
| free_host(grid->point_ids); | |
| free_host(grid->point_cells); | |
| free_host(grid->cell_starts); | |
| free_host(grid->cell_ends); | |
| delete grid; | |
| } | |
| void hash_grid_reserve_host(uint64_t id, int num_points) | |
| { | |
| HashGrid* grid = (HashGrid*)(id); | |
| if (num_points > grid->max_points) | |
| { | |
| free_host(grid->point_cells); | |
| free_host(grid->point_ids); | |
| const int num_to_alloc = num_points*3/2; | |
| grid->point_cells = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers | |
| grid->point_ids = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers | |
| grid->max_points = num_to_alloc; | |
| } | |
| grid->num_points = num_points; | |
| } | |
| void hash_grid_update_host(uint64_t id, float cell_width, const wp::vec3* points, int num_points) | |
| { | |
| HashGrid* grid = (HashGrid*)(id); | |
| hash_grid_reserve_host(id, num_points); | |
| grid->cell_width = cell_width; | |
| grid->cell_width_inv = 1.0f / cell_width; | |
| // calculate cell for each position | |
| for (int i=0; i < num_points; ++i) | |
| { | |
| grid->point_cells[i] = hash_grid_index(*grid, points[i]); | |
| grid->point_ids[i] = i; | |
| } | |
| // sort indices | |
| radix_sort_pairs_host(grid->point_cells, grid->point_ids, num_points); | |
| const int num_cells = grid->dim_x * grid->dim_y * grid->dim_z; | |
| memset(grid->cell_starts, 0, sizeof(int) * num_cells); | |
| memset(grid->cell_ends, 0, sizeof(int) * num_cells); | |
| // compute cell start / end | |
| for (int i=0; i < num_points; ++i) | |
| { | |
| // scan the particle-cell array to find the start and end | |
| const int c = grid->point_cells[i]; | |
| if (i == 0) | |
| grid->cell_starts[c] = 0; | |
| else | |
| { | |
| const int p = grid->point_cells[i-1]; | |
| if (c != p) | |
| { | |
| grid->cell_starts[c] = i; | |
| grid->cell_ends[p] = i; | |
| } | |
| } | |
| if (i == num_points - 1) | |
| { | |
| grid->cell_ends[c] = i + 1; | |
| } | |
| } | |
| } | |
| // device methods | |
| uint64_t hash_grid_create_device(void* context, int dim_x, int dim_y, int dim_z) | |
| { | |
| ContextGuard guard(context); | |
| HashGrid grid; | |
| memset(&grid, 0, sizeof(HashGrid)); | |
| grid.context = context ? context : cuda_context_get_current(); | |
| grid.dim_x = dim_x; | |
| grid.dim_y = dim_y; | |
| grid.dim_z = dim_z; | |
| const int num_cells = dim_x*dim_y*dim_z; | |
| grid.cell_starts = (int*)alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int)); | |
| grid.cell_ends = (int*)alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int)); | |
| // upload to device | |
| HashGrid* grid_device = (HashGrid*)(alloc_device(WP_CURRENT_CONTEXT, sizeof(HashGrid))); | |
| memcpy_h2d(WP_CURRENT_CONTEXT, grid_device, &grid, sizeof(HashGrid)); | |
| uint64_t grid_id = (uint64_t)(grid_device); | |
| hash_grid_add_descriptor(grid_id, grid); | |
| return grid_id; | |
| } | |
| void hash_grid_destroy_device(uint64_t id) | |
| { | |
| HashGrid grid; | |
| if (hash_grid_get_descriptor(id, grid)) | |
| { | |
| ContextGuard guard(grid.context); | |
| free_device(WP_CURRENT_CONTEXT, grid.point_ids); | |
| free_device(WP_CURRENT_CONTEXT, grid.point_cells); | |
| free_device(WP_CURRENT_CONTEXT, grid.cell_starts); | |
| free_device(WP_CURRENT_CONTEXT, grid.cell_ends); | |
| free_device(WP_CURRENT_CONTEXT, (HashGrid*)id); | |
| hash_grid_rem_descriptor(id); | |
| } | |
| } | |
| void hash_grid_reserve_device(uint64_t id, int num_points) | |
| { | |
| HashGrid grid; | |
| if (hash_grid_get_descriptor(id, grid)) | |
| { | |
| if (num_points > grid.max_points) | |
| { | |
| ContextGuard guard(grid.context); | |
| free_device(WP_CURRENT_CONTEXT, grid.point_cells); | |
| free_device(WP_CURRENT_CONTEXT, grid.point_ids); | |
| const int num_to_alloc = num_points*3/2; | |
| grid.point_cells = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers | |
| grid.point_ids = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers | |
| grid.max_points = num_to_alloc; | |
| // ensure we pre-size our sort routine to avoid | |
| // allocations during graph capture | |
| radix_sort_reserve(WP_CURRENT_CONTEXT, num_to_alloc); | |
| // update device side grid descriptor, todo: this is | |
| // slightly redundant since it is performed again | |
| // inside hash_grid_update_device(), but since | |
| // reserve can be called from Python we need to make | |
| // sure it is consistent | |
| memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid)); | |
| // update host side grid descriptor | |
| hash_grid_add_descriptor(id, grid); | |
| } | |
| } | |
| } | |
| void hash_grid_update_device(uint64_t id, float cell_width, const wp::vec3* points, int num_points) | |
| { | |
| // ensure we have enough memory reserved for update | |
| // this must be done before retrieving the descriptor | |
| // below since it may update it | |
| hash_grid_reserve_device(id, num_points); | |
| // host grid must be static so that we can | |
| // perform host->device memcpy from this variable | |
| // and have it safely recorded inside CUDA graphs | |
| static HashGrid grid; | |
| if (hash_grid_get_descriptor(id, grid)) | |
| { | |
| ContextGuard guard(grid.context); | |
| grid.num_points = num_points; | |
| grid.cell_width = cell_width; | |
| grid.cell_width_inv = 1.0f / cell_width; | |
| hash_grid_rebuild_device(grid, points, num_points); | |
| // update device side grid descriptor | |
| memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid)); | |
| // update host side grid descriptor | |
| hash_grid_add_descriptor(id, grid); | |
| } | |
| } | |
| namespace wp | |
| { | |
| void hash_grid_rebuild_device(const HashGrid& grid, const wp::vec3* points, int num_points) | |
| { | |
| } | |
| } // namespace wp | |