qbhf2's picture
added NvidiaWarp and GarmentCode repos
66c9c8a
raw
history blame
7.78 kB
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
#include "warp.h"
#include "cuda_util.h"
#include "hashgrid.h"
#include "sort.h"
#include "string.h"
using namespace wp;
#include <map>
namespace
{
// host-side copy of mesh descriptors, maps GPU mesh address (id) to a CPU desc
std::map<uint64_t, HashGrid> g_hash_grid_descriptors;
} // anonymous namespace
namespace wp
{
bool hash_grid_get_descriptor(uint64_t id, HashGrid& grid)
{
const auto& iter = g_hash_grid_descriptors.find(id);
if (iter == g_hash_grid_descriptors.end())
return false;
else
grid = iter->second;
return true;
}
void hash_grid_add_descriptor(uint64_t id, const HashGrid& grid)
{
g_hash_grid_descriptors[id] = grid;
}
void hash_grid_rem_descriptor(uint64_t id)
{
g_hash_grid_descriptors.erase(id);
}
// implemented in hashgrid.cu
void hash_grid_rebuild_device(const HashGrid& grid, const wp::vec3* points, int num_points);
} // namespace wp
// host methods
uint64_t hash_grid_create_host(int dim_x, int dim_y, int dim_z)
{
HashGrid* grid = new HashGrid();
memset(grid, 0, sizeof(HashGrid));
grid->dim_x = dim_x;
grid->dim_y = dim_y;
grid->dim_z = dim_z;
const int num_cells = dim_x*dim_y*dim_z;
grid->cell_starts = (int*)alloc_host(num_cells*sizeof(int));
grid->cell_ends = (int*)alloc_host(num_cells*sizeof(int));
return (uint64_t)(grid);
}
void hash_grid_destroy_host(uint64_t id)
{
HashGrid* grid = (HashGrid*)(id);
free_host(grid->point_ids);
free_host(grid->point_cells);
free_host(grid->cell_starts);
free_host(grid->cell_ends);
delete grid;
}
void hash_grid_reserve_host(uint64_t id, int num_points)
{
HashGrid* grid = (HashGrid*)(id);
if (num_points > grid->max_points)
{
free_host(grid->point_cells);
free_host(grid->point_ids);
const int num_to_alloc = num_points*3/2;
grid->point_cells = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers
grid->point_ids = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers
grid->max_points = num_to_alloc;
}
grid->num_points = num_points;
}
void hash_grid_update_host(uint64_t id, float cell_width, const wp::vec3* points, int num_points)
{
HashGrid* grid = (HashGrid*)(id);
hash_grid_reserve_host(id, num_points);
grid->cell_width = cell_width;
grid->cell_width_inv = 1.0f / cell_width;
// calculate cell for each position
for (int i=0; i < num_points; ++i)
{
grid->point_cells[i] = hash_grid_index(*grid, points[i]);
grid->point_ids[i] = i;
}
// sort indices
radix_sort_pairs_host(grid->point_cells, grid->point_ids, num_points);
const int num_cells = grid->dim_x * grid->dim_y * grid->dim_z;
memset(grid->cell_starts, 0, sizeof(int) * num_cells);
memset(grid->cell_ends, 0, sizeof(int) * num_cells);
// compute cell start / end
for (int i=0; i < num_points; ++i)
{
// scan the particle-cell array to find the start and end
const int c = grid->point_cells[i];
if (i == 0)
grid->cell_starts[c] = 0;
else
{
const int p = grid->point_cells[i-1];
if (c != p)
{
grid->cell_starts[c] = i;
grid->cell_ends[p] = i;
}
}
if (i == num_points - 1)
{
grid->cell_ends[c] = i + 1;
}
}
}
// device methods
uint64_t hash_grid_create_device(void* context, int dim_x, int dim_y, int dim_z)
{
ContextGuard guard(context);
HashGrid grid;
memset(&grid, 0, sizeof(HashGrid));
grid.context = context ? context : cuda_context_get_current();
grid.dim_x = dim_x;
grid.dim_y = dim_y;
grid.dim_z = dim_z;
const int num_cells = dim_x*dim_y*dim_z;
grid.cell_starts = (int*)alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int));
grid.cell_ends = (int*)alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int));
// upload to device
HashGrid* grid_device = (HashGrid*)(alloc_device(WP_CURRENT_CONTEXT, sizeof(HashGrid)));
memcpy_h2d(WP_CURRENT_CONTEXT, grid_device, &grid, sizeof(HashGrid));
uint64_t grid_id = (uint64_t)(grid_device);
hash_grid_add_descriptor(grid_id, grid);
return grid_id;
}
void hash_grid_destroy_device(uint64_t id)
{
HashGrid grid;
if (hash_grid_get_descriptor(id, grid))
{
ContextGuard guard(grid.context);
free_device(WP_CURRENT_CONTEXT, grid.point_ids);
free_device(WP_CURRENT_CONTEXT, grid.point_cells);
free_device(WP_CURRENT_CONTEXT, grid.cell_starts);
free_device(WP_CURRENT_CONTEXT, grid.cell_ends);
free_device(WP_CURRENT_CONTEXT, (HashGrid*)id);
hash_grid_rem_descriptor(id);
}
}
void hash_grid_reserve_device(uint64_t id, int num_points)
{
HashGrid grid;
if (hash_grid_get_descriptor(id, grid))
{
if (num_points > grid.max_points)
{
ContextGuard guard(grid.context);
free_device(WP_CURRENT_CONTEXT, grid.point_cells);
free_device(WP_CURRENT_CONTEXT, grid.point_ids);
const int num_to_alloc = num_points*3/2;
grid.point_cells = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers
grid.point_ids = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers
grid.max_points = num_to_alloc;
// ensure we pre-size our sort routine to avoid
// allocations during graph capture
radix_sort_reserve(WP_CURRENT_CONTEXT, num_to_alloc);
// update device side grid descriptor, todo: this is
// slightly redundant since it is performed again
// inside hash_grid_update_device(), but since
// reserve can be called from Python we need to make
// sure it is consistent
memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));
// update host side grid descriptor
hash_grid_add_descriptor(id, grid);
}
}
}
void hash_grid_update_device(uint64_t id, float cell_width, const wp::vec3* points, int num_points)
{
// ensure we have enough memory reserved for update
// this must be done before retrieving the descriptor
// below since it may update it
hash_grid_reserve_device(id, num_points);
// host grid must be static so that we can
// perform host->device memcpy from this variable
// and have it safely recorded inside CUDA graphs
static HashGrid grid;
if (hash_grid_get_descriptor(id, grid))
{
ContextGuard guard(grid.context);
grid.num_points = num_points;
grid.cell_width = cell_width;
grid.cell_width_inv = 1.0f / cell_width;
hash_grid_rebuild_device(grid, points, num_points);
// update device side grid descriptor
memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));
// update host side grid descriptor
hash_grid_add_descriptor(id, grid);
}
}
#if !WP_ENABLE_CUDA
namespace wp
{
void hash_grid_rebuild_device(const HashGrid& grid, const wp::vec3* points, int num_points)
{
}
} // namespace wp
#endif // !WP_ENABLE_CUDA