/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved. * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation * and any modifications thereto. Any use, reproduction, disclosure or * distribution of this software and related documentation without an express * license agreement from NVIDIA CORPORATION is strictly prohibited. */ #pragma once #include "builtin.h" namespace wp { CUDA_CALLABLE inline vec3 closest_point_to_aabb(const vec3& p, const vec3& lower, const vec3& upper) { vec3 c; { float v = p[0]; if (v < lower[0]) v = lower[0]; if (v > upper[0]) v = upper[0]; c[0] = v; } { float v = p[1]; if (v < lower[1]) v = lower[1]; if (v > upper[1]) v = upper[1]; c[1] = v; } { float v = p[2]; if (v < lower[2]) v = lower[2]; if (v > upper[2]) v = upper[2]; c[2] = v; } return c; } CUDA_CALLABLE inline vec2 closest_point_to_triangle(const vec3& a, const vec3& b, const vec3& c, const vec3& p) { vec3 ab = b-a; vec3 ac = c-a; vec3 ap = p-a; float u, v, w; float d1 = dot(ab, ap); float d2 = dot(ac, ap); if (d1 <= 0.0f && d2 <= 0.0f) { v = 0.0f; w = 0.0f; u = 1.0f - v - w; return vec2(u, v); } vec3 bp = p-b; float d3 = dot(ab, bp); float d4 = dot(ac, bp); if (d3 >= 0.0f && d4 <= d3) { v = 1.0f; w = 0.0f; u = 1.0f - v - w; return vec2(u, v); } float vc = d1*d4 - d3*d2; if (vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f) { v = d1 / (d1-d3); w = 0.0f; u = 1.0f - v - w; return vec2(u, v); } vec3 cp = p-c; float d5 = dot(ab, cp); float d6 = dot(ac, cp); if (d6 >= 0.0f && d5 <= d6) { v = 0.0f; w = 1.0f; u = 1.0f - v - w; return vec2(u, v); } float vb = d5*d2 - d1*d6; if (vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f) { v = 0.0f; w = d2 / (d2 - d6); u = 1.0f - v - w; return vec2(u, v); } float va = d3*d6 - d5*d4; if (va <= 0.0f && (d4 -d3) >= 0.0f && (d5-d6) >= 0.0f) { w = (d4-d3)/((d4-d3) + (d5-d6)); v = 1.0f - w; u = 1.0f - v - w; return vec2(u, v); } float denom = 1.0f / (va + vb + vc); v = vb * denom; w = vc * denom; u = 1.0f - v - w; return vec2(u, v); } CUDA_CALLABLE inline vec2 furthest_point_to_triangle(const vec3& a, const vec3& b, const vec3& c, const vec3& p) { vec3 pa = p-a; vec3 pb = p-b; vec3 pc = p-c; float dist_a = dot(pa, pa); float dist_b = dot(pb, pb); float dist_c = dot(pc, pc); if (dist_a > dist_b && dist_a > dist_c) return vec2(1.0f, 0.0f); // a is furthest if (dist_b > dist_c) return vec2(0.0f, 1.0f); // b is furthest return vec2(0.0f, 0.0f); // c is furthest } CUDA_CALLABLE inline bool intersect_ray_aabb(const vec3& pos, const vec3& rcp_dir, const vec3& lower, const vec3& upper, float& t) { float l1, l2, lmin, lmax; l1 = (lower[0] - pos[0]) * rcp_dir[0]; l2 = (upper[0] - pos[0]) * rcp_dir[0]; lmin = min(l1,l2); lmax = max(l1,l2); l1 = (lower[1] - pos[1]) * rcp_dir[1]; l2 = (upper[1] - pos[1]) * rcp_dir[1]; lmin = max(min(l1,l2), lmin); lmax = min(max(l1,l2), lmax); l1 = (lower[2] - pos[2]) * rcp_dir[2]; l2 = (upper[2] - pos[2]) * rcp_dir[2]; lmin = max(min(l1,l2), lmin); lmax = min(max(l1,l2), lmax); bool hit = ((lmax >= 0.f) & (lmax >= lmin)); if (hit) t = lmin; return hit; } // Moller and Trumbore's method CUDA_CALLABLE inline bool intersect_ray_tri_moller(const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float& t, float& u, float& v, float& w, float& sign, vec3* normal) { vec3 ab = b - a; vec3 ac = c - a; vec3 n = cross(ab, ac); float d = dot(-dir, n); float ood = 1.0f / d; // No need to check for division by zero here as infinity arithmetic will save us... vec3 ap = p - a; t = dot(ap, n) * ood; if (t < 0.0f) return false; vec3 e = cross(-dir, ap); v = dot(ac, e) * ood; if (v < 0.0f || v > 1.0f) // ...here... return false; w = -dot(ab, e) * ood; if (w < 0.0f || (v + w) > 1.0f) // ...and here return false; u = 1.0f - v - w; if (normal) *normal = n; sign = d; return true; } CUDA_CALLABLE inline bool intersect_ray_tri_rtcd(const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float& t, float& u, float& v, float& w, float& sign, vec3* normal) { const vec3 ab = b-a; const vec3 ac = c-a; // calculate normal vec3 n = cross(ab, ac); // need to solve a system of three equations to give t, u, v float d = dot(-dir, n); // if dir is parallel to triangle plane or points away from triangle if (d <= 0.0f) return false; vec3 ap = p-a; t = dot(ap, n); // ignores tris behind if (t < 0.0f) return false; // compute barycentric coordinates vec3 e = cross(-dir, ap); v = dot(ac, e); if (v < 0.0f || v > d) return false; w = -dot(ab, e); if (w < 0.0f || v + w > d) return false; float ood = 1.0f / d; t *= ood; v *= ood; w *= ood; u = 1.0f-v-w; // optionally write out normal (todo: this branch is a performance concern, should probably remove) if (normal) *normal = n; return true; } #ifndef __CUDA_ARCH__ // these are provided as built-ins by CUDA inline float __int_as_float(int i) { return *(float*)(&i); } inline int __float_as_int(float f) { return *(int*)(&f); } #endif CUDA_CALLABLE inline float xorf(float x, int y) { return __int_as_float(__float_as_int(x) ^ y); } CUDA_CALLABLE inline int sign_mask(float x) { return __float_as_int(x) & 0x80000000; } CUDA_CALLABLE inline int max_dim(vec3 a) { float x = abs(a[0]); float y = abs(a[1]); float z = abs(a[2]); return longest_axis(vec3(x, y, z)); } // computes the difference of products a*b - c*d using // FMA instructions for improved numerical precision CUDA_CALLABLE inline float diff_product(float a, float b, float c, float d) { float cd = c * d; float diff = fmaf(a, b, -cd); float error = fmaf(-c, d, cd); return diff + error; } // http://jcgt.org/published/0002/01/05/ CUDA_CALLABLE inline bool intersect_ray_tri_woop(const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float& t, float& u, float& v, float& sign, vec3* normal) { // todo: precompute for ray int kz = max_dim(dir); int kx = kz+1; if (kx == 3) kx = 0; int ky = kx+1; if (ky == 3) ky = 0; if (dir[kz] < 0.0f) { float tmp = kx; kx = ky; ky = tmp; } float Sx = dir[kx]/dir[kz]; float Sy = dir[ky]/dir[kz]; float Sz = 1.0f/dir[kz]; // todo: end precompute const vec3 A = a-p; const vec3 B = b-p; const vec3 C = c-p; const float Ax = A[kx] - Sx*A[kz]; const float Ay = A[ky] - Sy*A[kz]; const float Bx = B[kx] - Sx*B[kz]; const float By = B[ky] - Sy*B[kz]; const float Cx = C[kx] - Sx*C[kz]; const float Cy = C[ky] - Sy*C[kz]; float U = diff_product(Cx, By, Cy, Bx); float V = diff_product(Ax, Cy, Ay, Cx); float W = diff_product(Bx, Ay, By, Ax); if (U == 0.0f || V == 0.0f || W == 0.0f) { double CxBy = (double)Cx*(double)By; double CyBx = (double)Cy*(double)Bx; U = (float)(CxBy - CyBx); double AxCy = (double)Ax*(double)Cy; double AyCx = (double)Ay*(double)Cx; V = (float)(AxCy - AyCx); double BxAy = (double)Bx*(double)Ay; double ByAx = (double)By*(double)Ax; W = (float)(BxAy - ByAx); } if ((U<0.0f || V<0.0f || W<0.0f) && (U>0.0f || V>0.0f || W>0.0f)) { return false; } float det = U+V+W; if (det == 0.0f) { return false; } const float Az = Sz*A[kz]; const float Bz = Sz*B[kz]; const float Cz = Sz*C[kz]; const float T = U*Az + V*Bz + W*Cz; int det_sign = sign_mask(det); if (xorf(T,det_sign) < 0.0f)// || xorf(T,det_sign) > hit.t * xorf(det, det_sign)) // early out if hit.t is specified { return false; } const float rcpDet = 1.0f/det; u = U*rcpDet; v = V*rcpDet; t = T*rcpDet; sign = det; // optionally write out normal (todo: this branch is a performance concern, should probably remove) if (normal) { const vec3 ab = b-a; const vec3 ac = c-a; // calculate normal *normal = cross(ab, ac); } return true; } CUDA_CALLABLE inline void adj_intersect_ray_tri_woop( const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float& t, float& u, float& v, float& sign, vec3* normal, vec3& adj_p, vec3& adj_dir, vec3& adj_a, vec3& adj_b, vec3& adj_c, float& adj_t, float& adj_u, float& adj_v, float& adj_sign, vec3* adj_normal, bool& adj_ret) { // todo: precompute for ray int kz = max_dim(dir); int kx = kz+1; if (kx == 3) kx = 0; int ky = kx+1; if (ky == 3) ky = 0; if (dir[kz] < 0.0f) { float tmp = kx; kx = ky; ky = tmp; } const float Dx = dir[kx]; const float Dy = dir[ky]; const float Dz = dir[kz]; const float Sx = dir[kx]/dir[kz]; const float Sy = dir[ky]/dir[kz]; const float Sz = 1.0f/dir[kz]; // todo: end precompute const vec3 A = a-p; const vec3 B = b-p; const vec3 C = c-p; const float Ax = A[kx] - Sx*A[kz]; const float Ay = A[ky] - Sy*A[kz]; const float Bx = B[kx] - Sx*B[kz]; const float By = B[ky] - Sy*B[kz]; const float Cx = C[kx] - Sx*C[kz]; const float Cy = C[ky] - Sy*C[kz]; float U = Cx*By - Cy*Bx; float V = Ax*Cy - Ay*Cx; float W = Bx*Ay - By*Ax; if (U == 0.0f || V == 0.0f || W == 0.0f) { double CxBy = (double)Cx*(double)By; double CyBx = (double)Cy*(double)Bx; U = (float)(CxBy - CyBx); double AxCy = (double)Ax*(double)Cy; double AyCx = (double)Ay*(double)Cx; V = (float)(AxCy - AyCx); double BxAy = (double)Bx*(double)Ay; double ByAx = (double)By*(double)Ax; W = (float)(BxAy - ByAx); } if ((U<0.0f || V<0.0f || W<0.0f) && (U>0.0f || V>0.0f || W>0.0f)) return; float det = U+V+W; if (det == 0.0f) return; const float Az = Sz*A[kz]; const float Bz = Sz*B[kz]; const float Cz = Sz*C[kz]; const float T = U*Az + V*Bz + W*Cz; int det_sign = sign_mask(det); if (xorf(T,det_sign) < 0.0f)// || xorf(T,det_sign) > hit.t * xorf(det, det_sign)) // early out if hit.t is specified return; const float rcpDet = (1.f / det); const float rcpDetSq = rcpDet * rcpDet; // adj_p const float dAx_dpx = -1.f; const float dBx_dpx = -1.f; const float dCx_dpx = -1.f; const float dAy_dpx = 0.f; const float dBy_dpx = 0.f; const float dCy_dpx = 0.f; const float dAz_dpx = 0.f; const float dBz_dpx = 0.f; const float dCz_dpx = 0.f; const float dAx_dpy = 0.f; const float dBx_dpy = 0.f; const float dCx_dpy = 0.f; const float dAy_dpy = -1.f; const float dBy_dpy = -1.f; const float dCy_dpy = -1.f; const float dAz_dpy = 0.f; const float dBz_dpy = 0.f; const float dCz_dpy = 0.f; const float dAx_dpz = Sx; const float dBx_dpz = Sx; const float dCx_dpz = Sx; const float dAy_dpz = Sy; const float dBy_dpz = Sy; const float dCy_dpz = Sy; const float dAz_dpz = -Sz; const float dBz_dpz = -Sz; const float dCz_dpz = -Sz; const float dU_dpx = Cx * dBy_dpx + By * dCx_dpx - Cy * dBx_dpx - Bx * dCy_dpx; const float dU_dpy = Cx * dBy_dpy + By * dCx_dpy - Cy * dBx_dpy - Bx * dCy_dpy; const float dU_dpz = Cx * dBy_dpz + By * dCx_dpz - Cy * dBx_dpz - Bx * dCy_dpz; const vec3 dU_dp = vec3(dU_dpx, dU_dpy, dU_dpz); const float dV_dpx = Ax * dCy_dpx + Cy * dAx_dpx - Ay * dCx_dpx - Cx * dAy_dpx; const float dV_dpy = Ax * dCy_dpy + Cy * dAx_dpy - Ay * dCx_dpy - Cx * dAy_dpy; const float dV_dpz = Ax * dCy_dpz + Cy * dAx_dpz - Ay * dCx_dpz - Cx * dAy_dpz; const vec3 dV_dp = vec3(dV_dpx, dV_dpy, dV_dpz); const float dW_dpx = Bx * dAy_dpx + Ay * dBx_dpx - By * dAx_dpx - Ax * dBy_dpx; const float dW_dpy = Bx * dAy_dpy + Ay * dBx_dpy - By * dAx_dpy - Ax * dBy_dpy; const float dW_dpz = Bx * dAy_dpz + Ay * dBx_dpz - By * dAx_dpz - Ax * dBy_dpz; const vec3 dW_dp = vec3(dW_dpx, dW_dpy, dW_dpz); const float dT_dpx = dU_dpx * Az + U * dAz_dpx + dV_dpx * Bz + V * dBz_dpx + dW_dpx * Cz + W * dCz_dpx; const float dT_dpy = dU_dpy * Az + U * dAz_dpy + dV_dpy * Bz + V * dBz_dpy + dW_dpy * Cz + W * dCz_dpy; const float dT_dpz = dU_dpz * Az + U * dAz_dpz + dV_dpz * Bz + V * dBz_dpz + dW_dpz * Cz + W * dCz_dpz; const vec3 dT_dp = vec3(dT_dpx, dT_dpy, dT_dpz); const float dDet_dpx = dU_dpx + dV_dpx + dW_dpx; const float dDet_dpy = dU_dpy + dV_dpy + dW_dpy; const float dDet_dpz = dU_dpz + dV_dpz + dW_dpz; const vec3 dDet_dp = vec3(dDet_dpx, dDet_dpy, dDet_dpz); const vec3 du_dp = rcpDet * dU_dp + -U * rcpDetSq * dDet_dp; const vec3 dv_dp = rcpDet * dV_dp + -V * rcpDetSq * dDet_dp; const vec3 dt_dp = rcpDet * dT_dp + -T * rcpDetSq * dDet_dp; vec3 adj_p_swapped = adj_u*du_dp + adj_v*dv_dp + adj_t*dt_dp; adj_p[kx] += adj_p_swapped[0]; adj_p[ky] += adj_p_swapped[1]; adj_p[kz] += adj_p_swapped[2]; // adj_dir const float dAx_dDx = -Sz * A[kz]; const float dBx_dDx = -Sz * B[kz]; const float dCx_dDx = -Sz * C[kz]; const float dAy_dDx = 0.f; const float dBy_dDx = 0.f; const float dCy_dDx = 0.f; const float dAz_dDx = 0.f; const float dBz_dDx = 0.f; const float dCz_dDx = 0.f; const float dAx_dDy = 0.f; const float dBx_dDy = 0.f; const float dCx_dDy = 0.f; const float dAy_dDy = -Sz * A[kz]; const float dBy_dDy = -Sz * B[kz]; const float dCy_dDy = -Sz * C[kz]; const float dAz_dDy = 0.f; const float dBz_dDy = 0.f; const float dCz_dDy = 0.f; const float dAx_dDz = Dx * Sz * Sz * A[kz]; const float dBx_dDz = Dx * Sz * Sz * B[kz]; const float dCx_dDz = Dx * Sz * Sz * C[kz]; const float dAy_dDz = Dy * Sz * Sz * A[kz]; const float dBy_dDz = Dy * Sz * Sz * B[kz]; const float dCy_dDz = Dy * Sz * Sz * C[kz]; const float dAz_dDz = -Sz * Sz * A[kz]; const float dBz_dDz = -Sz * Sz * B[kz]; const float dCz_dDz = -Sz * Sz * C[kz]; const float dU_dDx = Cx * dBy_dDx + By * dCx_dDx - Cy * dBx_dDx - Bx * dCy_dDx; const float dU_dDy = Cx * dBy_dDy + By * dCx_dDy - Cy * dBx_dDy - Bx * dCy_dDy; const float dU_dDz = Cx * dBy_dDz + By * dCx_dDz - Cy * dBx_dDz - Bx * dCy_dDz; const vec3 dU_dD = vec3(dU_dDx, dU_dDy, dU_dDz); const float dV_dDx = Ax * dCy_dDx + Cy * dAx_dDx - Ay * dCx_dDx - Cx * dAy_dDx; const float dV_dDy = Ax * dCy_dDy + Cy * dAx_dDy - Ay * dCx_dDy - Cx * dAy_dDy; const float dV_dDz = Ax * dCy_dDz + Cy * dAx_dDz - Ay * dCx_dDz - Cx * dAy_dDz; const vec3 dV_dD = vec3(dV_dDx, dV_dDy, dV_dDz); const float dW_dDx = Bx * dAy_dDx + Ay * dBx_dDx - By * dAx_dDx - Ax * dBy_dDx; const float dW_dDy = Bx * dAy_dDy + Ay * dBx_dDy - By * dAx_dDy - Ax * dBy_dDy; const float dW_dDz = Bx * dAy_dDz + Ay * dBx_dDz - By * dAx_dDz - Ax * dBy_dDz; const vec3 dW_dD = vec3(dW_dDx, dW_dDy, dW_dDz); const float dT_dDx = dU_dDx * Az + U * dAz_dDx + dV_dDx * Bz + V * dBz_dDx + dW_dDx * Cz + W * dCz_dDx; const float dT_dDy = dU_dDy * Az + U * dAz_dDy + dV_dDy * Bz + V * dBz_dDy + dW_dDy * Cz + W * dCz_dDy; const float dT_dDz = dU_dDz * Az + U * dAz_dDz + dV_dDz * Bz + V * dBz_dDz + dW_dDz * Cz + W * dCz_dDz; const vec3 dT_dD = vec3(dT_dDx, dT_dDy, dT_dDz); const float dDet_dDx = dU_dDx + dV_dDx + dW_dDx; const float dDet_dDy = dU_dDy + dV_dDy + dW_dDy; const float dDet_dDz = dU_dDz + dV_dDz + dW_dDz; const vec3 dDet_dD = vec3(dDet_dDx, dDet_dDy, dDet_dDz); const vec3 du_dD = rcpDet * dU_dD + -U * rcpDetSq * dDet_dD; const vec3 dv_dD = rcpDet * dV_dD + -V * rcpDetSq * dDet_dD; const vec3 dt_dD = rcpDet * dT_dD + -T * rcpDetSq * dDet_dD; vec3 adj_dir_swapped = adj_u*du_dD + adj_v*dv_dD + adj_t*dt_dD; adj_dir[kx] += adj_dir_swapped[0]; adj_dir[ky] += adj_dir_swapped[1]; adj_dir[kz] += adj_dir_swapped[2]; } // Möller's method #include "intersect_tri.h" CUDA_CALLABLE inline int intersect_tri_tri( vec3& v0, vec3& v1, vec3& v2, vec3& u0, vec3& u1, vec3& u2) { return NoDivTriTriIsect(&v0[0], &v1[0], &v2[0], &u0[0], &u1[0], &u2[0]); } CUDA_CALLABLE inline void adj_intersect_tri_tri(const vec3& var_v0, const vec3& var_v1, const vec3& var_v2, const vec3& var_u0, const vec3& var_u1, const vec3& var_u2, vec3& adj_v0, vec3& adj_v1, vec3& adj_v2, vec3& adj_u0, vec3& adj_u1, vec3& adj_u2, int adj_ret) {} CUDA_CALLABLE inline void adj_closest_point_to_triangle( const vec3& var_a, const vec3& var_b, const vec3& var_c, const vec3& var_p, vec3& adj_a, vec3& adj_b, vec3& adj_c, vec3& adj_p, vec2& adj_ret) { // primal vars vec3 var_0; vec3 var_1; vec3 var_2; float32 var_3; float32 var_4; const float32 var_5 = 0.0; bool var_6; bool var_7; bool var_8; const float32 var_9 = 1.0; vec2 var_10; vec3 var_11; float32 var_12; float32 var_13; bool var_14; bool var_15; bool var_16; vec2 var_17; vec2 var_18; float32 var_19; float32 var_20; float32 var_21; float32 var_22; float32 var_23; bool var_24; bool var_25; bool var_26; bool var_27; float32 var_28; vec2 var_29; vec2 var_30; vec3 var_31; float32 var_32; float32 var_33; bool var_34; bool var_35; bool var_36; vec2 var_37; vec2 var_38; float32 var_39; float32 var_40; float32 var_41; float32 var_42; float32 var_43; bool var_44; bool var_45; bool var_46; bool var_47; float32 var_48; vec2 var_49; vec2 var_50; float32 var_51; float32 var_52; float32 var_53; float32 var_54; float32 var_55; float32 var_56; float32 var_57; float32 var_58; bool var_59; float32 var_60; bool var_61; float32 var_62; bool var_63; bool var_64; float32 var_65; vec2 var_66; // vec2 var_67; float32 var_68; float32 var_69; float32 var_70; float32 var_71; float32 var_72; float32 var_73; float32 var_74; // vec2 var_75; //--------- // dual vars vec3 adj_0 = 0; vec3 adj_1 = 0; vec3 adj_2 = 0; float32 adj_3 = 0; float32 adj_4 = 0; float32 adj_5 = 0; //bool adj_6 = 0; //bool adj_7 = 0; //bool adj_8 = 0; float32 adj_9 = 0; vec2 adj_10 = 0; vec3 adj_11 = 0; float32 adj_12 = 0; float32 adj_13 = 0; //bool adj_14 = 0; //bool adj_15 = 0; bool adj_16 = 0; vec2 adj_17 = 0; vec2 adj_18 = 0; float32 adj_19 = 0; float32 adj_20 = 0; float32 adj_21 = 0; float32 adj_22 = 0; float32 adj_23 = 0; //bool adj_24 = 0; //bool adj_25 = 0; //bool adj_26 = 0; bool adj_27 = 0; float32 adj_28 = 0; vec2 adj_29 = 0; vec2 adj_30 = 0; vec3 adj_31 = 0; float32 adj_32 = 0; float32 adj_33 = 0; //bool adj_34 = 0; //bool adj_35 = 0; bool adj_36 = 0; vec2 adj_37 = 0; vec2 adj_38 = 0; float32 adj_39 = 0; float32 adj_40 = 0; float32 adj_41 = 0; float32 adj_42 = 0; float32 adj_43 = 0; //bool adj_44 = 0; //bool adj_45 = 0; //bool adj_46 = 0; bool adj_47 = 0; float32 adj_48 = 0; vec2 adj_49 = 0; vec2 adj_50 = 0; float32 adj_51 = 0; float32 adj_52 = 0; float32 adj_53 = 0; float32 adj_54 = 0; float32 adj_55 = 0; float32 adj_56 = 0; float32 adj_57 = 0; float32 adj_58 = 0; //bool adj_59 = 0; float32 adj_60 = 0; //bool adj_61 = 0; float32 adj_62 = 0; //bool adj_63 = 0; bool adj_64 = 0; float32 adj_65 = 0; vec2 adj_66 = 0; vec2 adj_67 = 0; float32 adj_68 = 0; float32 adj_69 = 0; float32 adj_70 = 0; float32 adj_71 = 0; float32 adj_72 = 0; float32 adj_73 = 0; float32 adj_74 = 0; vec2 adj_75 = 0; //--------- // forward var_0 = wp::sub(var_b, var_a); var_1 = wp::sub(var_c, var_a); var_2 = wp::sub(var_p, var_a); var_3 = wp::dot(var_0, var_2); var_4 = wp::dot(var_1, var_2); var_6 = (var_3 <= var_5); var_7 = (var_4 <= var_5); var_8 = var_6 && var_7; if (var_8) { var_10 = wp::vec2(var_9, var_5); goto label0; } var_11 = wp::sub(var_p, var_b); var_12 = wp::dot(var_0, var_11); var_13 = wp::dot(var_1, var_11); var_14 = (var_12 >= var_5); var_15 = (var_13 <= var_12); var_16 = var_14 && var_15; if (var_16) { var_17 = wp::vec2(var_5, var_9); goto label1; } var_18 = wp::select(var_16, var_10, var_17); var_19 = wp::mul(var_3, var_13); var_20 = wp::mul(var_12, var_4); var_21 = wp::sub(var_19, var_20); var_22 = wp::sub(var_3, var_12); var_23 = wp::div(var_3, var_22); var_24 = (var_21 <= var_5); var_25 = (var_3 >= var_5); var_26 = (var_12 <= var_5); var_27 = var_24 && var_25 && var_26; if (var_27) { var_28 = wp::sub(var_9, var_23); var_29 = wp::vec2(var_28, var_23); goto label2; } var_30 = wp::select(var_27, var_18, var_29); var_31 = wp::sub(var_p, var_c); var_32 = wp::dot(var_0, var_31); var_33 = wp::dot(var_1, var_31); var_34 = (var_33 >= var_5); var_35 = (var_32 <= var_33); var_36 = var_34 && var_35; if (var_36) { var_37 = wp::vec2(var_5, var_5); goto label3; } var_38 = wp::select(var_36, var_30, var_37); var_39 = wp::mul(var_32, var_4); var_40 = wp::mul(var_3, var_33); var_41 = wp::sub(var_39, var_40); var_42 = wp::sub(var_4, var_33); var_43 = wp::div(var_4, var_42); var_44 = (var_41 <= var_5); var_45 = (var_4 >= var_5); var_46 = (var_33 <= var_5); var_47 = var_44 && var_45 && var_46; if (var_47) { var_48 = wp::sub(var_9, var_43); var_49 = wp::vec2(var_48, var_5); goto label4; } var_50 = wp::select(var_47, var_38, var_49); var_51 = wp::mul(var_12, var_33); var_52 = wp::mul(var_32, var_13); var_53 = wp::sub(var_51, var_52); var_54 = wp::sub(var_13, var_12); var_55 = wp::sub(var_13, var_12); var_56 = wp::sub(var_32, var_33); var_57 = wp::add(var_55, var_56); var_58 = wp::div(var_54, var_57); var_59 = (var_53 <= var_5); var_60 = wp::sub(var_13, var_12); var_61 = (var_60 >= var_5); var_62 = wp::sub(var_32, var_33); var_63 = (var_62 >= var_5); var_64 = var_59 && var_61 && var_63; if (var_64) { var_65 = wp::sub(var_9, var_58); var_66 = wp::vec2(var_5, var_65); goto label5; } // var_67 = wp::select(var_64, var_50, var_66); var_68 = wp::add(var_53, var_41); var_69 = wp::add(var_68, var_21); var_70 = wp::div(var_9, var_69); var_71 = wp::mul(var_41, var_70); var_72 = wp::mul(var_21, var_70); var_73 = wp::sub(var_9, var_71); var_74 = wp::sub(var_73, var_72); // var_75 = wp::vec2(var_74, var_71); goto label6; //--------- // reverse label6:; adj_75 += adj_ret; wp::adj_vec2(var_74, var_71, adj_74, adj_71, adj_75); wp::adj_sub(var_73, var_72, adj_73, adj_72, adj_74); wp::adj_sub(var_9, var_71, adj_9, adj_71, adj_73); wp::adj_mul(var_21, var_70, adj_21, adj_70, adj_72); wp::adj_mul(var_41, var_70, adj_41, adj_70, adj_71); wp::adj_div(var_9, var_69, var_70, adj_9, adj_69, adj_70); wp::adj_add(var_68, var_21, adj_68, adj_21, adj_69); wp::adj_add(var_53, var_41, adj_53, adj_41, adj_68); wp::adj_select(var_64, var_50, var_66, adj_64, adj_50, adj_66, adj_67); if (var_64) { label5:; adj_66 += adj_ret; wp::adj_vec2(var_5, var_65, adj_5, adj_65, adj_66); wp::adj_sub(var_9, var_58, adj_9, adj_58, adj_65); } wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_62); wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_60); wp::adj_div(var_54, var_57, var_58, adj_54, adj_57, adj_58); wp::adj_add(var_55, var_56, adj_55, adj_56, adj_57); wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_56); wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_55); wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_54); wp::adj_sub(var_51, var_52, adj_51, adj_52, adj_53); wp::adj_mul(var_32, var_13, adj_32, adj_13, adj_52); wp::adj_mul(var_12, var_33, adj_12, adj_33, adj_51); wp::adj_select(var_47, var_38, var_49, adj_47, adj_38, adj_49, adj_50); if (var_47) { label4:; adj_49 += adj_ret; wp::adj_vec2(var_48, var_5, adj_48, adj_5, adj_49); wp::adj_sub(var_9, var_43, adj_9, adj_43, adj_48); } wp::adj_div(var_4, var_42, var_43, adj_4, adj_42, adj_43); wp::adj_sub(var_4, var_33, adj_4, adj_33, adj_42); wp::adj_sub(var_39, var_40, adj_39, adj_40, adj_41); wp::adj_mul(var_3, var_33, adj_3, adj_33, adj_40); wp::adj_mul(var_32, var_4, adj_32, adj_4, adj_39); wp::adj_select(var_36, var_30, var_37, adj_36, adj_30, adj_37, adj_38); if (var_36) { label3:; adj_37 += adj_ret; wp::adj_vec2(var_5, var_5, adj_5, adj_5, adj_37); } wp::adj_dot(var_1, var_31, adj_1, adj_31, adj_33); wp::adj_dot(var_0, var_31, adj_0, adj_31, adj_32); wp::adj_sub(var_p, var_c, adj_p, adj_c, adj_31); wp::adj_select(var_27, var_18, var_29, adj_27, adj_18, adj_29, adj_30); if (var_27) { label2:; adj_29 += adj_ret; wp::adj_vec2(var_28, var_23, adj_28, adj_23, adj_29); wp::adj_sub(var_9, var_23, adj_9, adj_23, adj_28); } wp::adj_div(var_3, var_22, var_23, adj_3, adj_22, adj_23); wp::adj_sub(var_3, var_12, adj_3, adj_12, adj_22); wp::adj_sub(var_19, var_20, adj_19, adj_20, adj_21); wp::adj_mul(var_12, var_4, adj_12, adj_4, adj_20); wp::adj_mul(var_3, var_13, adj_3, adj_13, adj_19); wp::adj_select(var_16, var_10, var_17, adj_16, adj_10, adj_17, adj_18); if (var_16) { label1:; adj_17 += adj_ret; wp::adj_vec2(var_5, var_9, adj_5, adj_9, adj_17); } wp::adj_dot(var_1, var_11, adj_1, adj_11, adj_13); wp::adj_dot(var_0, var_11, adj_0, adj_11, adj_12); wp::adj_sub(var_p, var_b, adj_p, adj_b, adj_11); if (var_8) { label0:; adj_10 += adj_ret; wp::adj_vec2(var_9, var_5, adj_9, adj_5, adj_10); } wp::adj_dot(var_1, var_2, adj_1, adj_2, adj_4); wp::adj_dot(var_0, var_2, adj_0, adj_2, adj_3); wp::adj_sub(var_p, var_a, adj_p, adj_a, adj_2); wp::adj_sub(var_c, var_a, adj_c, adj_a, adj_1); wp::adj_sub(var_b, var_a, adj_b, adj_a, adj_0); return; } // ---------------------------------------------------------------- // jleaf: I needed to replace "float(" with "cast_float(" manually below because // "#define float(x) cast_float(x)"" in this header affects other files. // See adjoint in "intersect_adj.h" for the generated adjoint. /* Here is the original warp implementation that was used to generate this code: # https://books.google.ca/books?id=WGpL6Sk9qNAC&printsec=frontcover&hl=en#v=onepage&q=triangle&f=false # From 5.1.9 # p1 and q1 are points of edge 1. # p2 and q2 are points of edge 2. # epsilon zero tolerance for determining if points in an edge are degenerate # output: A single wp.vec3, containing s and t for edges 1 and 2 respectively, # and the distance between their closest points. @wp.func def closest_point_edge_edge( p1: wp.vec3, q1: wp.vec3, p2: wp.vec3, q2: wp.vec3, epsilon: float ): # direction vectors of each segment/edge d1 = q1 - p1 d2 = q2 - p2 r = p1 - p2 a = wp.dot(d1, d1) # squared length of segment s1, always nonnegative e = wp.dot(d2, d2) # squared length of segment s2, always nonnegative f = wp.dot(d2, r) s = float(0.0) t = float(0.0) dist = wp.length(p2 - p1) # Check if either or both segments degenerate into points if a <= epsilon and e <= epsilon: # both segments degenerate into points return wp.vec3(s, t, dist) if a <= epsilon: s = float(0.0) t = float(f / e) # s = 0 => t = (b*s + f) / e = f / e else: c = wp.dot(d1, r) if e <= epsilon: # second segment genereates into a point s = wp.clamp(-c / a, 0.0, 1.0) # t = 0 => s = (b*t-c)/a = -c/a t = float(0.0) else: # The general nondegenerate case starts here b = wp.dot(d1, d2) denom = a * e - b * b # always nonnegative # if segments not parallel, compute closest point on L1 to L2 and # clamp to segment S1. Else pick arbitrary s (here 0) if denom != 0.0: s = wp.clamp((b * f - c * e) / denom, 0.0, 1.0) else: s = 0.0 # compute point on L2 closest to S1(s) using # t = dot((p1+d2*s) - p2,d2)/dot(d2,d2) = (b*s+f)/e t = (b * s + f) / e # if t in [0,1] done. Else clamp t, recompute s for the new value # of t using s = dot((p2+d2*t-p1,d1)/dot(d1,d1) = (t*b - c)/a # and clamp s to [0,1] if t < 0.0: t = 0.0 s = wp.clamp(-c / a, 0.0, 1.0) elif t > 1.0: t = 1.0 s = wp.clamp((b - c) / a, 0.0, 1.0) c1 = p1 + (q1 - p1) * s c2 = p2 + (q2 - p2) * t dist = wp.length(c2 - c1) return wp.vec3(s, t, dist) */ static CUDA_CALLABLE vec3 closest_point_edge_edge(vec3 var_p1, vec3 var_q1, vec3 var_p2, vec3 var_q2, float32 var_epsilon) { //--------- // primal vars vec3 var_0; vec3 var_1; vec3 var_2; float32 var_3; float32 var_4; float32 var_5; const float32 var_6 = 0.0; float32 var_7; float32 var_8; vec3 var_9; float32 var_10; bool var_11; bool var_12; bool var_13; vec3 var_14; bool var_15; float32 var_16; float32 var_17; float32 var_18; float32 var_19; float32 var_20; float32 var_21; bool var_22; float32 var_23; float32 var_24; const float32 var_25 = 1.0; float32 var_26; float32 var_27; float32 var_28; float32 var_29; float32 var_30; float32 var_31; float32 var_32; float32 var_33; bool var_34; float32 var_35; float32 var_36; float32 var_37; float32 var_38; float32 var_39; float32 var_40; float32 var_41; float32 var_42; float32 var_43; float32 var_44; bool var_45; float32 var_46; float32 var_47; float32 var_48; float32 var_49; float32 var_50; bool var_51; float32 var_52; float32 var_53; float32 var_54; float32 var_55; float32 var_56; float32 var_57; float32 var_58; float32 var_59; float32 var_60; float32 var_61; float32 var_62; vec3 var_63; vec3 var_64; vec3 var_65; vec3 var_66; vec3 var_67; vec3 var_68; vec3 var_69; float32 var_70; vec3 var_71; //--------- // forward var_0 = wp::sub(var_q1, var_p1); var_1 = wp::sub(var_q2, var_p2); var_2 = wp::sub(var_p1, var_p2); var_3 = wp::dot(var_0, var_0); var_4 = wp::dot(var_1, var_1); var_5 = wp::dot(var_1, var_2); var_7 = wp::cast_float(var_6); var_8 = wp::cast_float(var_6); var_9 = wp::sub(var_p2, var_p1); var_10 = wp::length(var_9); var_11 = (var_3 <= var_epsilon); var_12 = (var_4 <= var_epsilon); var_13 = var_11 && var_12; if (var_13) { var_14 = wp::vec3(var_7, var_8, var_10); return var_14; } var_15 = (var_3 <= var_epsilon); if (var_15) { var_16 = wp::cast_float(var_6); var_17 = wp::div(var_5, var_4); var_18 = wp::cast_float(var_17); } var_19 = wp::select(var_15, var_7, var_16); var_20 = wp::select(var_15, var_8, var_18); if (!var_15) { var_21 = wp::dot(var_0, var_2); var_22 = (var_4 <= var_epsilon); if (var_22) { var_23 = wp::neg(var_21); var_24 = wp::div(var_23, var_3); var_26 = wp::clamp(var_24, var_6, var_25); var_27 = wp::cast_float(var_6); } var_28 = wp::select(var_22, var_19, var_26); var_29 = wp::select(var_22, var_20, var_27); if (!var_22) { var_30 = wp::dot(var_0, var_1); var_31 = wp::mul(var_3, var_4); var_32 = wp::mul(var_30, var_30); var_33 = wp::sub(var_31, var_32); var_34 = (var_33 != var_6); if (var_34) { var_35 = wp::mul(var_30, var_5); var_36 = wp::mul(var_21, var_4); var_37 = wp::sub(var_35, var_36); var_38 = wp::div(var_37, var_33); var_39 = wp::clamp(var_38, var_6, var_25); } var_40 = wp::select(var_34, var_28, var_39); if (!var_34) { } var_41 = wp::select(var_34, var_6, var_40); var_42 = wp::mul(var_30, var_41); var_43 = wp::add(var_42, var_5); var_44 = wp::div(var_43, var_4); var_45 = (var_44 < var_6); if (var_45) { var_46 = wp::neg(var_21); var_47 = wp::div(var_46, var_3); var_48 = wp::clamp(var_47, var_6, var_25); } var_49 = wp::select(var_45, var_41, var_48); var_50 = wp::select(var_45, var_44, var_6); if (!var_45) { var_51 = (var_50 > var_25); if (var_51) { var_52 = wp::sub(var_30, var_21); var_53 = wp::div(var_52, var_3); var_54 = wp::clamp(var_53, var_6, var_25); } var_55 = wp::select(var_51, var_49, var_54); var_56 = wp::select(var_51, var_50, var_25); } var_57 = wp::select(var_45, var_55, var_49); var_58 = wp::select(var_45, var_56, var_50); } var_59 = wp::select(var_22, var_57, var_28); var_60 = wp::select(var_22, var_58, var_29); } var_61 = wp::select(var_15, var_59, var_19); var_62 = wp::select(var_15, var_60, var_20); var_63 = wp::sub(var_q1, var_p1); var_64 = wp::mul(var_63, var_61); var_65 = wp::add(var_p1, var_64); var_66 = wp::sub(var_q2, var_p2); var_67 = wp::mul(var_66, var_62); var_68 = wp::add(var_p2, var_67); var_69 = wp::sub(var_68, var_65); var_70 = wp::length(var_69); var_71 = wp::vec3(var_61, var_62, var_70); return var_71; } } // namespace wp