Spaces:

qbhf2
/

GarmentCode

Sleeping

App Files Files Community

GarmentCode / NvidiaWarp-GarmentCode /warp /native /hashgrid.cpp

qbhf2

added NvidiaWarp and GarmentCode repos

66c9c8a 11 months ago

raw

history blame

7.78 kB

	/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
	* NVIDIA CORPORATION and its licensors retain all intellectual property
	* and proprietary rights in and to this software, related documentation
	* and any modifications thereto. Any use, reproduction, disclosure or
	* distribution of this software and related documentation without an express
	* license agreement from NVIDIA CORPORATION is strictly prohibited.
	*/

	#include "warp.h"
	#include "cuda_util.h"
	#include "hashgrid.h"
	#include "sort.h"
	#include "string.h"

	using namespace wp;

	#include <map>

	namespace
	{
	// host-side copy of mesh descriptors, maps GPU mesh address (id) to a CPU desc
	std::map<uint64_t, HashGrid> g_hash_grid_descriptors;

	} // anonymous namespace


	namespace wp
	{

	bool hash_grid_get_descriptor(uint64_t id, HashGrid& grid)
	{
	const auto& iter = g_hash_grid_descriptors.find(id);
	if (iter == g_hash_grid_descriptors.end())
	return false;
	else
	grid = iter->second;
	return true;
	}

	void hash_grid_add_descriptor(uint64_t id, const HashGrid& grid)
	{
	g_hash_grid_descriptors[id] = grid;
	}

	void hash_grid_rem_descriptor(uint64_t id)
	{
	g_hash_grid_descriptors.erase(id);

	}

	// implemented in hashgrid.cu
	void hash_grid_rebuild_device(const HashGrid& grid, const wp::vec3* points, int num_points);

	} // namespace wp


	// host methods
	uint64_t hash_grid_create_host(int dim_x, int dim_y, int dim_z)
	{
	HashGrid* grid = new HashGrid();
	memset(grid, 0, sizeof(HashGrid));

	grid->dim_x = dim_x;
	grid->dim_y = dim_y;
	grid->dim_z = dim_z;

	const int num_cells = dim_xdim_ydim_z;
	grid->cell_starts = (int)alloc_host(num_cellssizeof(int));
	grid->cell_ends = (int)alloc_host(num_cellssizeof(int));

	return (uint64_t)(grid);
	}

	void hash_grid_destroy_host(uint64_t id)
	{
	HashGrid* grid = (HashGrid*)(id);

	free_host(grid->point_ids);
	free_host(grid->point_cells);
	free_host(grid->cell_starts);
	free_host(grid->cell_ends);

	delete grid;
	}

	void hash_grid_reserve_host(uint64_t id, int num_points)
	{
	HashGrid* grid = (HashGrid*)(id);

	if (num_points > grid->max_points)
	{
	free_host(grid->point_cells);
	free_host(grid->point_ids);

	const int num_to_alloc = num_points*3/2;
	grid->point_cells = (int)alloc_host(2num_to_allocsizeof(int)); // 2 for auxilliary radix buffers
	grid->point_ids = (int)alloc_host(2num_to_allocsizeof(int)); // 2 for auxilliary radix buffers

	grid->max_points = num_to_alloc;
	}

	grid->num_points = num_points;
	}

	void hash_grid_update_host(uint64_t id, float cell_width, const wp::vec3* points, int num_points)
	{
	HashGrid* grid = (HashGrid*)(id);

	hash_grid_reserve_host(id, num_points);

	grid->cell_width = cell_width;
	grid->cell_width_inv = 1.0f / cell_width;

	// calculate cell for each position
	for (int i=0; i < num_points; ++i)
	{
	grid->point_cells[i] = hash_grid_index(*grid, points[i]);
	grid->point_ids[i] = i;
	}

	// sort indices
	radix_sort_pairs_host(grid->point_cells, grid->point_ids, num_points);

	const int num_cells = grid->dim_x * grid->dim_y * grid->dim_z;
	memset(grid->cell_starts, 0, sizeof(int) * num_cells);
	memset(grid->cell_ends, 0, sizeof(int) * num_cells);

	// compute cell start / end
	for (int i=0; i < num_points; ++i)
	{
	// scan the particle-cell array to find the start and end
	const int c = grid->point_cells[i];

	if (i == 0)
	grid->cell_starts[c] = 0;
	else
	{
	const int p = grid->point_cells[i-1];

	if (c != p)
	{
	grid->cell_starts[c] = i;
	grid->cell_ends[p] = i;
	}
	}

	if (i == num_points - 1)
	{
	grid->cell_ends[c] = i + 1;
	}
	}
	}

	// device methods
	uint64_t hash_grid_create_device(void* context, int dim_x, int dim_y, int dim_z)
	{
	ContextGuard guard(context);

	HashGrid grid;
	memset(&grid, 0, sizeof(HashGrid));

	grid.context = context ? context : cuda_context_get_current();

	grid.dim_x = dim_x;
	grid.dim_y = dim_y;
	grid.dim_z = dim_z;

	const int num_cells = dim_xdim_ydim_z;
	grid.cell_starts = (int)alloc_device(WP_CURRENT_CONTEXT, num_cellssizeof(int));
	grid.cell_ends = (int)alloc_device(WP_CURRENT_CONTEXT, num_cellssizeof(int));

	// upload to device
	HashGrid* grid_device = (HashGrid*)(alloc_device(WP_CURRENT_CONTEXT, sizeof(HashGrid)));
	memcpy_h2d(WP_CURRENT_CONTEXT, grid_device, &grid, sizeof(HashGrid));

	uint64_t grid_id = (uint64_t)(grid_device);
	hash_grid_add_descriptor(grid_id, grid);

	return grid_id;
	}

	void hash_grid_destroy_device(uint64_t id)
	{
	HashGrid grid;
	if (hash_grid_get_descriptor(id, grid))
	{
	ContextGuard guard(grid.context);

	free_device(WP_CURRENT_CONTEXT, grid.point_ids);
	free_device(WP_CURRENT_CONTEXT, grid.point_cells);
	free_device(WP_CURRENT_CONTEXT, grid.cell_starts);
	free_device(WP_CURRENT_CONTEXT, grid.cell_ends);

	free_device(WP_CURRENT_CONTEXT, (HashGrid*)id);

	hash_grid_rem_descriptor(id);
	}
	}


	void hash_grid_reserve_device(uint64_t id, int num_points)
	{
	HashGrid grid;

	if (hash_grid_get_descriptor(id, grid))
	{
	if (num_points > grid.max_points)
	{
	ContextGuard guard(grid.context);

	free_device(WP_CURRENT_CONTEXT, grid.point_cells);
	free_device(WP_CURRENT_CONTEXT, grid.point_ids);

	const int num_to_alloc = num_points*3/2;
	grid.point_cells = (int)alloc_device(WP_CURRENT_CONTEXT, 2num_to_allocsizeof(int)); // 2 for auxilliary radix buffers
	grid.point_ids = (int)alloc_device(WP_CURRENT_CONTEXT, 2num_to_allocsizeof(int)); // 2 for auxilliary radix buffers
	grid.max_points = num_to_alloc;

	// ensure we pre-size our sort routine to avoid
	// allocations during graph capture
	radix_sort_reserve(WP_CURRENT_CONTEXT, num_to_alloc);

	// update device side grid descriptor, todo: this is
	// slightly redundant since it is performed again
	// inside hash_grid_update_device(), but since
	// reserve can be called from Python we need to make
	// sure it is consistent
	memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));

	// update host side grid descriptor
	hash_grid_add_descriptor(id, grid);
	}
	}
	}

	void hash_grid_update_device(uint64_t id, float cell_width, const wp::vec3* points, int num_points)
	{

	// ensure we have enough memory reserved for update
	// this must be done before retrieving the descriptor
	// below since it may update it
	hash_grid_reserve_device(id, num_points);

	// host grid must be static so that we can
	// perform host->device memcpy from this variable
	// and have it safely recorded inside CUDA graphs
	static HashGrid grid;

	if (hash_grid_get_descriptor(id, grid))
	{
	ContextGuard guard(grid.context);

	grid.num_points = num_points;
	grid.cell_width = cell_width;
	grid.cell_width_inv = 1.0f / cell_width;

	hash_grid_rebuild_device(grid, points, num_points);

	// update device side grid descriptor
	memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));

	// update host side grid descriptor
	hash_grid_add_descriptor(id, grid);
	}
	}

	#if !WP_ENABLE_CUDA

	namespace wp
	{

	void hash_grid_rebuild_device(const HashGrid& grid, const wp::vec3* points, int num_points)
	{

	}

	} // namespace wp

	#endif // !WP_ENABLE_CUDA