qbhf2's picture
added NvidiaWarp and GarmentCode repos
66c9c8a
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
# pragma once
#include "array.h"
#ifndef M_PI_F
#define M_PI_F 3.14159265358979323846f
#endif
namespace wp
{
inline CUDA_CALLABLE uint32 rand_pcg(uint32 state)
{
uint32 b = state * 747796405u + 2891336453u;
uint32 c = ((b >> ((b >> 28u) + 4u)) ^ b) * 277803737u;
return (c >> 22u) ^ c;
}
inline CUDA_CALLABLE uint32 rand_init(int seed) { return rand_pcg(uint32(seed)); }
inline CUDA_CALLABLE uint32 rand_init(int seed, int offset) { return rand_pcg(uint32(seed) + rand_pcg(uint32(offset))); }
inline CUDA_CALLABLE int randi(uint32& state) { state = rand_pcg(state); return int(state); }
inline CUDA_CALLABLE int randi(uint32& state, int min, int max) { state = rand_pcg(state); return state % (max - min) + min; }
inline CUDA_CALLABLE float randf(uint32& state) { state = rand_pcg(state); return (state >> 8) * (1.0f / 16777216.0f); }
inline CUDA_CALLABLE float randf(uint32& state, float min, float max) { return (max - min) * randf(state) + min; }
// Box-Muller method
inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state))) * cos(2.f * M_PI_F * randf(state)); }
inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, float adj_ret) {}
inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, float adj_ret) {}
inline CUDA_CALLABLE void adj_randi(uint32& state, uint32& adj_state, float adj_ret) {}
inline CUDA_CALLABLE void adj_randi(uint32& state, int min, int max, uint32& adj_state, int& adj_min, int& adj_max, float adj_ret) {}
inline CUDA_CALLABLE void adj_randf(uint32& state, uint32& adj_state, float adj_ret) {}
inline CUDA_CALLABLE void adj_randf(uint32& state, float min, float max, uint32& adj_state, float& adj_min, float& adj_max, float adj_ret) {}
inline CUDA_CALLABLE void adj_randn(uint32& state, uint32& adj_state, float adj_ret) {}
inline CUDA_CALLABLE int sample_cdf(uint32& state, const array_t<float>& cdf)
{
float u = randf(state);
return lower_bound<float>(cdf, u);
}
inline CUDA_CALLABLE vec2 sample_triangle(uint32& state)
{
float r = sqrt(randf(state));
float u = 1.f - r;
float v = randf(state) * r;
return vec2(u, v);
}
inline CUDA_CALLABLE vec2 sample_unit_ring(uint32& state)
{
float theta = randf(state, 0.f, 2.f*M_PI_F);
float x = cos(theta);
float y = sin(theta);
return vec2(x, y);
}
inline CUDA_CALLABLE vec2 sample_unit_disk(uint32& state)
{
float r = sqrt(randf(state));
float theta = randf(state, 0.f, 2.f*M_PI_F);
float x = r * cos(theta);
float y = r * sin(theta);
return vec2(x, y);
}
inline CUDA_CALLABLE vec3 sample_unit_sphere_surface(uint32& state)
{
float phi = acos(1.f - 2.f * randf(state));
float theta = randf(state, 0.f, 2.f*M_PI_F);
float x = cos(theta) * sin(phi);
float y = sin(theta) * sin(phi);
float z = cos(phi);
return vec3(x, y, z);
}
inline CUDA_CALLABLE vec3 sample_unit_sphere(uint32& state)
{
float phi = acos(1.f - 2.f * randf(state));
float theta = randf(state, 0.f, 2.f*M_PI_F);
float r = pow(randf(state), 1.f/3.f);
float x = r * cos(theta) * sin(phi);
float y = r * sin(theta) * sin(phi);
float z = r * cos(phi);
return vec3(x, y, z);
}
inline CUDA_CALLABLE vec3 sample_unit_hemisphere_surface(uint32& state)
{
float phi = acos(1.f - randf(state));
float theta = randf(state, 0.f, 2.f*M_PI_F);
float x = cos(theta) * sin(phi);
float y = sin(theta) * sin(phi);
float z = cos(phi);
return vec3(x, y, z);
}
inline CUDA_CALLABLE vec3 sample_unit_hemisphere(uint32& state)
{
float phi = acos(1.f - randf(state));
float theta = randf(state, 0.f, 2.f*M_PI_F);
float r = pow(randf(state), 1.f/3.f);
float x = r * cos(theta) * sin(phi);
float y = r * sin(theta) * sin(phi);
float z = r * cos(phi);
return vec3(x, y, z);
}
inline CUDA_CALLABLE vec2 sample_unit_square(uint32& state)
{
float x = randf(state) - 0.5f;
float y = randf(state) - 0.5f;
return vec2(x, y);
}
inline CUDA_CALLABLE vec3 sample_unit_cube(uint32& state)
{
float x = randf(state) - 0.5f;
float y = randf(state) - 0.5f;
float z = randf(state) - 0.5f;
return vec3(x, y, z);
}
inline CUDA_CALLABLE vec4 sample_unit_hypercube(uint32& state)
{
float a = randf(state) - 0.5f;
float b = randf(state) - 0.5f;
float c = randf(state) - 0.5f;
float d = randf(state) - 0.5f;
return vec4(a, b, c, d);
}
inline CUDA_CALLABLE void adj_sample_cdf(uint32& state, const array_t<float>& cdf, uint32& adj_state, array_t<float>& adj_cdf, const int& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_triangle(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_unit_ring(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_unit_disk(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_unit_sphere_surface(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_unit_sphere(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_unit_hemisphere_surface(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_unit_hemisphere(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_unit_square(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_unit_cube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
/*
* log-gamma function to support some of these distributions. The
* algorithm comes from SPECFUN by Shanjie Zhang and Jianming Jin and their
* book "Computation of Special Functions", 1996, John Wiley & Sons, Inc.
*
* If random_loggam(k+1) is being used to compute log(k!) for an integer k, consider
* using logfactorial(k) instead.
*/
inline CUDA_CALLABLE float random_loggam(float x)
{
float x0, x2, lg2pi, gl, gl0;
uint32 n;
const float a[10] = {8.333333333333333e-02f, -2.777777777777778e-03f,
7.936507936507937e-04f, -5.952380952380952e-04f,
8.417508417508418e-04f, -1.917526917526918e-03f,
6.410256410256410e-03f, -2.955065359477124e-02f,
1.796443723688307e-01f, -1.39243221690590e+00f};
if ((x == 1.f) || (x == 2.f))
{
return 0.f;
}
else if (x < 7.f)
{
n = uint32((7 - x));
}
else
{
n = 0;
}
x0 = x + float(n);
x2 = (1.f / x0) * (1.f / x0);
// log(2 * M_PI_F)
lg2pi = 1.8378770664093453f;
gl0 = a[9];
for (int i = 8; i >= 0; i--)
{
gl0 *= x2;
gl0 += a[i];
}
gl = gl0 / x0 + 0.5f * lg2pi + (x0 - 0.5f) * log(x0) - x0;
if (x < 7.f)
{
for (uint32 k = 1; k <= n; k++)
{
gl -= log(x0 - 1.f);
x0 -= 1.f;
}
}
return gl;
}
inline CUDA_CALLABLE uint32 random_poisson_mult(uint32& state, float lam) {
uint32 X;
float prod, U, enlam;
enlam = exp(-lam);
X = 0;
prod = 1.f;
while (1)
{
U = randf(state);
prod *= U;
if (prod > enlam)
{
X += 1;
}
else
{
return X;
}
}
}
/*
* The transformed rejection method for generating Poisson random variables
* W. Hoermann
* Insurance: Mathematics and Economics 12, 39-45 (1993)
*/
inline CUDA_CALLABLE uint32 random_poisson(uint32& state, float lam)
{
uint32 k;
float U, V, slam, loglam, a, b, invalpha, vr, us;
slam = sqrt(lam);
loglam = log(lam);
b = 0.931f + 2.53f * slam;
a = -0.059f + 0.02483f * b;
invalpha = 1.1239f + 1.1328f / (b - 3.4f);
vr = 0.9277f - 3.6224f / (b - 2.f);
while (1)
{
U = randf(state) - 0.5f;
V = randf(state);
us = 0.5f - abs(U);
k = uint32(floor((2.f * a / us + b) * U + lam + 0.43f));
if ((us >= 0.07f) && (V <= vr))
{
return k;
}
if ((us < 0.013f) && (V > us))
{
continue;
}
if ((log(V) + log(invalpha) - log(a / (us * us) + b)) <= (-lam + k * loglam - random_loggam(k + 1)))
{
return k;
}
}
}
/*
* Adpated from NumPy's implementation
* Warp's state variable is half the precision of NumPy's so
* poisson implementation uses half the precision used in NumPy's implementation
* both precisions appear to converge in the statistical limit
*/
inline CUDA_CALLABLE uint32 poisson(uint32& state, float lam)
{
if (lam >= 10.f)
{
return random_poisson(state, lam);
}
else if (lam == 0.f)
{
return 0;
}
else
{
return random_poisson_mult(state, lam);
}
}
inline CUDA_CALLABLE void adj_random_loggam(float x, float& adj_x, const float adj_ret) {}
inline CUDA_CALLABLE void random_poisson_mult(uint32& state, float lam, uint32& adj_state, float& adj_lam, const uint32& adj_ret) {}
inline CUDA_CALLABLE void adj_random_poisson(uint32& state, float lam, uint32& adj_state, float& adj_lam, const uint32& adj_ret) {}
inline CUDA_CALLABLE void adj_poisson(uint32& state, float lam, uint32& adj_state, float& adj_lam, const uint32& adj_ret) {}
} // namespace wp