qbhf2's picture
added NvidiaWarp and GarmentCode repos
66c9c8a
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
#pragma once
#include "builtin.h"
#include "intersect.h"
namespace wp
{
struct bounds3
{
CUDA_CALLABLE inline bounds3() : lower( FLT_MAX)
, upper(-FLT_MAX) {}
CUDA_CALLABLE inline bounds3(const vec3& lower, const vec3& upper) : lower(lower), upper(upper) {}
CUDA_CALLABLE inline vec3 center() const { return 0.5f*(lower+upper); }
CUDA_CALLABLE inline vec3 edges() const { return upper-lower; }
CUDA_CALLABLE inline void expand(float r)
{
lower -= vec3(r);
upper += vec3(r);
}
CUDA_CALLABLE inline void expand(const vec3& r)
{
lower -= r;
upper += r;
}
CUDA_CALLABLE inline bool empty() const { return lower[0] >= upper[0] || lower[1] >= upper[1] || lower[2] >= upper[2]; }
CUDA_CALLABLE inline bool overlaps(const vec3& p) const
{
if (p[0] < lower[0] ||
p[1] < lower[1] ||
p[2] < lower[2] ||
p[0] > upper[0] ||
p[1] > upper[1] ||
p[2] > upper[2])
{
return false;
}
else
{
return true;
}
}
CUDA_CALLABLE inline bool overlaps(const bounds3& b) const
{
if (lower[0] > b.upper[0] ||
lower[1] > b.upper[1] ||
lower[2] > b.upper[2] ||
upper[0] < b.lower[0] ||
upper[1] < b.lower[1] ||
upper[2] < b.lower[2])
{
return false;
}
else
{
return true;
}
}
CUDA_CALLABLE inline void add_point(const vec3& p)
{
lower = min(lower, p);
upper = max(upper, p);
}
CUDA_CALLABLE inline float area() const
{
vec3 e = upper-lower;
return 2.0f*(e[0]*e[1] + e[0]*e[2] + e[1]*e[2]);
}
vec3 lower;
vec3 upper;
};
CUDA_CALLABLE inline bounds3 bounds_union(const bounds3& a, const vec3& b)
{
return bounds3(min(a.lower, b), max(a.upper, b));
}
CUDA_CALLABLE inline bounds3 bounds_union(const bounds3& a, const bounds3& b)
{
return bounds3(min(a.lower, b.lower), max(a.upper, b.upper));
}
CUDA_CALLABLE inline bounds3 bounds_intersection(const bounds3& a, const bounds3& b)
{
return bounds3(max(a.lower, b.lower), min(a.upper, b.upper));
}
struct BVHPackedNodeHalf
{
float x;
float y;
float z;
unsigned int i : 31;
unsigned int b : 1;
};
struct BVH
{
BVHPackedNodeHalf* node_lowers;
BVHPackedNodeHalf* node_uppers;
// used for fast refits
int* node_parents;
int* node_counts;
int max_depth;
int max_nodes;
int num_nodes;
// pointer (CPU or GPU) to a single integer index in node_lowers, node_uppers
// representing the root of the tree, this is not always the first node
// for bottom-up builders
int* root;
// item bounds are not owned by the BVH but by the caller
vec3* item_lowers;
vec3* item_uppers;
int num_items;
// cuda context
void* context;
};
CUDA_CALLABLE inline BVHPackedNodeHalf make_node(const vec3& bound, int child, bool leaf)
{
BVHPackedNodeHalf n;
n.x = bound[0];
n.y = bound[1];
n.z = bound[2];
n.i = (unsigned int)child;
n.b = (unsigned int)(leaf?1:0);
return n;
}
// variation of make_node through volatile pointers used in build_hierarchy
CUDA_CALLABLE inline void make_node(volatile BVHPackedNodeHalf* n, const vec3& bound, int child, bool leaf)
{
n->x = bound[0];
n->y = bound[1];
n->z = bound[2];
n->i = (unsigned int)child;
n->b = (unsigned int)(leaf?1:0);
}
CUDA_CALLABLE inline int clz(int x)
{
int n;
if (x == 0) return 32;
for (n = 0; ((x & 0x80000000) == 0); n++, x <<= 1);
return n;
}
CUDA_CALLABLE inline uint32_t part1by2(uint32_t n)
{
n = (n ^ (n << 16)) & 0xff0000ff;
n = (n ^ (n << 8)) & 0x0300f00f;
n = (n ^ (n << 4)) & 0x030c30c3;
n = (n ^ (n << 2)) & 0x09249249;
return n;
}
// Takes values in the range [0, 1] and assigns an index based Morton codes of length 3*lwp2(dim) bits
template <int dim>
CUDA_CALLABLE inline uint32_t morton3(float x, float y, float z)
{
uint32_t ux = clamp(int(x*dim), 0, dim-1);
uint32_t uy = clamp(int(y*dim), 0, dim-1);
uint32_t uz = clamp(int(z*dim), 0, dim-1);
return (part1by2(uz) << 2) | (part1by2(uy) << 1) | part1by2(ux);
}
// making the class accessible from python
CUDA_CALLABLE inline BVH bvh_get(uint64_t id)
{
return *(BVH*)(id);
}
CUDA_CALLABLE inline int bvh_get_num_bounds(uint64_t id)
{
BVH bvh = bvh_get(id);
return bvh.num_items;
}
// stores state required to traverse the BVH nodes that
// overlap with a query AABB.
struct bvh_query_t
{
CUDA_CALLABLE bvh_query_t()
{
}
CUDA_CALLABLE bvh_query_t(int)
{
} // for backward pass
BVH bvh;
// BVH traversal stack:
int stack[32];
int count;
// inputs
bool is_ray;
wp::vec3 input_lower; // start for ray
wp::vec3 input_upper; // dir for ray
int bounds_nr;
};
CUDA_CALLABLE inline bvh_query_t bvh_query(
uint64_t id, bool is_ray, const vec3& lower, const vec3& upper)
{
// This routine traverses the BVH tree until it finds
// the first overlapping bound.
// initialize empty
bvh_query_t query;
query.bounds_nr = -1;
BVH bvh = bvh_get(id);
query.bvh = bvh;
query.is_ray = is_ray;
// optimization: make the latest
query.stack[0] = *bvh.root;
query.count = 1;
query.input_lower = lower;
query.input_upper = upper;
wp::bounds3 input_bounds(query.input_lower, query.input_upper);
// Navigate through the bvh, find the first overlapping leaf node.
while (query.count)
{
const int node_index = query.stack[--query.count];
BVHPackedNodeHalf node_lower = bvh.node_lowers[node_index];
BVHPackedNodeHalf node_upper = bvh.node_uppers[node_index];
wp::vec3 lower_pos(node_lower.x, node_lower.y, node_lower.z);
wp::vec3 upper_pos(node_upper.x, node_upper.y, node_upper.z);
wp::bounds3 current_bounds(lower_pos, upper_pos);
if (query.is_ray)
{
float t = 0.0f;
if (!intersect_ray_aabb(query.input_lower, query.input_upper, current_bounds.lower, current_bounds.upper, t))
// Skip this box, it doesn't overlap with our ray.
continue;
}
else
{
if (!input_bounds.overlaps(current_bounds))
// Skip this box, it doesn't overlap with our target box.
continue;
}
const int left_index = node_lower.i;
const int right_index = node_upper.i;
// Make bounds from this AABB
if (node_lower.b)
{
// found very first leaf index.
// Back up one level and return
query.stack[query.count++] = node_index;
return query;
}
else
{
query.stack[query.count++] = left_index;
query.stack[query.count++] = right_index;
}
}
return query;
}
CUDA_CALLABLE inline bvh_query_t bvh_query_aabb(
uint64_t id, const vec3& lower, const vec3& upper)
{
return bvh_query(id, false, lower, upper);
}
CUDA_CALLABLE inline bvh_query_t bvh_query_ray(
uint64_t id, const vec3& start, const vec3& dir)
{
return bvh_query(id, true, start, dir);
}
//Stub
CUDA_CALLABLE inline void adj_bvh_query_aabb(uint64_t id, const vec3& lower, const vec3& upper,
uint64_t, vec3&, vec3&, bvh_query_t&)
{
}
CUDA_CALLABLE inline void adj_bvh_query_ray(uint64_t id, const vec3& start, const vec3& dir,
uint64_t, vec3&, vec3&, bvh_query_t&)
{
}
CUDA_CALLABLE inline bool bvh_query_next(bvh_query_t& query, int& index)
{
BVH bvh = query.bvh;
wp::bounds3 input_bounds(query.input_lower, query.input_upper);
// Navigate through the bvh, find the first overlapping leaf node.
while (query.count)
{
const int node_index = query.stack[--query.count];
BVHPackedNodeHalf node_lower = bvh.node_lowers[node_index];
BVHPackedNodeHalf node_upper = bvh.node_uppers[node_index];
wp::vec3 lower_pos(node_lower.x, node_lower.y, node_lower.z);
wp::vec3 upper_pos(node_upper.x, node_upper.y, node_upper.z);
wp::bounds3 current_bounds(lower_pos, upper_pos);
if (query.is_ray)
{
float t = 0.0f;
if (!intersect_ray_aabb(query.input_lower, query.input_upper, current_bounds.lower, current_bounds.upper, t))
// Skip this box, it doesn't overlap with our ray.
continue;
}
else {
if (!input_bounds.overlaps(current_bounds))
// Skip this box, it doesn't overlap with our target box.
continue;
}
const int left_index = node_lower.i;
const int right_index = node_upper.i;
if (node_lower.b)
{
// found leaf
query.bounds_nr = left_index;
index = left_index;
return true;
}
else
{
query.stack[query.count++] = left_index;
query.stack[query.count++] = right_index;
}
}
return false;
}
CUDA_CALLABLE inline int iter_next(bvh_query_t& query)
{
return query.bounds_nr;
}
CUDA_CALLABLE inline bool iter_cmp(bvh_query_t& query)
{
bool finished = bvh_query_next(query, query.bounds_nr);
return finished;
}
CUDA_CALLABLE inline bvh_query_t iter_reverse(const bvh_query_t& query)
{
// can't reverse BVH queries, users should not rely on traversal ordering
return query;
}
// stub
CUDA_CALLABLE inline void adj_bvh_query_next(bvh_query_t& query, int& index, bvh_query_t&, int&, bool&)
{
}
CUDA_CALLABLE bool bvh_get_descriptor(uint64_t id, BVH& bvh);
CUDA_CALLABLE void bvh_add_descriptor(uint64_t id, const BVH& bvh);
CUDA_CALLABLE void bvh_rem_descriptor(uint64_t id);
#if !__CUDA_ARCH__
void bvh_destroy_host(wp::BVH& bvh);
void bvh_refit_host(wp::BVH& bvh);
void bvh_destroy_device(wp::BVH& bvh);
void bvh_refit_device(uint64_t id);
#endif
} // namespace wp