diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/.zgroup b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/.zgroup new file mode 100644 index 0000000000000000000000000000000000000000..3b7daf227c1687f28bc23b69f183e27ce9a475c1 --- /dev/null +++ b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/.zgroup b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/.zgroup new file mode 100644 index 0000000000000000000000000000000000000000..3b7daf227c1687f28bc23b69f183e27ce9a475c1 --- /dev/null +++ b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/1.0 b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..a13fd0c98da93a0ed75adf81c50a14a4e3e3ff76 Binary files /dev/null and b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/1.0 differ diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/12.0 b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/12.0 new file mode 100644 index 0000000000000000000000000000000000000000..3f11f3badc34ab4687eb6237d716d766619c0137 Binary files /dev/null and b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/12.0 differ diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/15.0 b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/15.0 new file mode 100644 index 0000000000000000000000000000000000000000..45097370bdc51863cb9eed4e0b96f8af5716b5a7 Binary files /dev/null and b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/15.0 differ diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/16.0 b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/16.0 new file mode 100644 index 0000000000000000000000000000000000000000..dec6b43d6b882a1eb058badfd711e308da67e6af Binary files /dev/null and b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/16.0 differ diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/19.0 b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/19.0 new file mode 100644 index 0000000000000000000000000000000000000000..485ecf836383ef7fafa59b5b9c125e15b7e0c360 Binary files /dev/null and b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/19.0 differ diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/2.0 b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/2.0 new file mode 100644 index 0000000000000000000000000000000000000000..b51b93f0aeda1f918e86e2fc59785c5bc3942983 Binary files /dev/null and b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/2.0 differ diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/4.0 b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/4.0 new file mode 100644 index 0000000000000000000000000000000000000000..9de28b9840d56ca35e0437b127c9b27af04fe54f Binary files /dev/null and b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/4.0 differ diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/8.0 b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/8.0 new file mode 100644 index 0000000000000000000000000000000000000000..256f46821b5c3a4ffeae50c12d5c088c89d84ccb Binary files /dev/null and b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/data/full_state/8.0 differ diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/meta/.zgroup b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/meta/.zgroup new file mode 100644 index 0000000000000000000000000000000000000000..3b7daf227c1687f28bc23b69f183e27ce9a475c1 --- /dev/null +++ b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/meta/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/meta/episode_ends/.zarray b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/meta/episode_ends/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..60914cf77bdc3cc1ad411eea64d76bf1f23dfa50 --- /dev/null +++ b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_door-unlock_expert.zarr/meta/episode_ends/.zarray @@ -0,0 +1,20 @@ +{ + "chunks": [ + 10 + ], + "compressor": { + "blocksize": 0, + "clevel": 3, + "cname": "zstd", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " +#include "sample_farthest_points/sample_farthest_points.h" + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + + m.def("sample_farthest_points", &FarthestPointSampling); +} diff --git a/pytorch3d_simplified/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h b/pytorch3d_simplified/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h new file mode 100644 index 0000000000000000000000000000000000000000..7b613d358880936863c2a56b82dee77d93d777f9 --- /dev/null +++ b/pytorch3d_simplified/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once +#include +#include +#include "utils/pytorch3d_cutils.h" + +// Iterative farthest point sampling algorithm [1] to subsample a set of +// K points from a given pointcloud. At each iteration, a point is selected +// which has the largest nearest neighbor distance to any of the +// already selected points. + +// Farthest point sampling provides more uniform coverage of the input +// point cloud compared to uniform random sampling. + +// [1] Charles R. Qi et al, "PointNet++: Deep Hierarchical Feature Learning +// on Point Sets in a Metric Space", NeurIPS 2017. + +// Args: +// points: (N, P, D) float32 Tensor containing the batch of pointclouds. +// lengths: (N,) long Tensor giving the number of points in each pointcloud +// (to support heterogeneous batches of pointclouds). +// K: a tensor of length (N,) giving the number of +// samples to select for each element in the batch. +// The number of samples is typically << P. +// start_idxs: (N,) long Tensor giving the index of the first point to +// sample. Default is all 0. When a random start point is required, +// start_idxs should be set to a random value between [0, lengths[n]] +// for batch element n. +// Returns: +// selected_indices: (N, K) array of selected indices. If the values in +// K are not all the same, then the shape will be (N, max(K), D), and +// padded with -1 for batch elements where k_i < max(K). The selected +// points are gathered in the pytorch autograd wrapper. + +at::Tensor FarthestPointSamplingCuda( + const at::Tensor& points, + const at::Tensor& lengths, + const at::Tensor& K, + const at::Tensor& start_idxs); + +at::Tensor FarthestPointSamplingCpu( + const at::Tensor& points, + const at::Tensor& lengths, + const at::Tensor& K, + const at::Tensor& start_idxs); + +// Exposed implementation. +at::Tensor FarthestPointSampling( + const at::Tensor& points, + const at::Tensor& lengths, + const at::Tensor& K, + const at::Tensor& start_idxs) { + if (points.is_cuda() || lengths.is_cuda() || K.is_cuda()) { +#ifdef WITH_CUDA + CHECK_CUDA(points); + CHECK_CUDA(lengths); + CHECK_CUDA(K); + CHECK_CUDA(start_idxs); + return FarthestPointSamplingCuda(points, lengths, K, start_idxs); +#else + AT_ERROR("Not compiled with GPU support."); +#endif + } + return FarthestPointSamplingCpu(points, lengths, K, start_idxs); +} diff --git a/pytorch3d_simplified/pytorch3d/csrc/sample_farthest_points/sample_farthest_points_cpu.cpp b/pytorch3d_simplified/pytorch3d/csrc/sample_farthest_points/sample_farthest_points_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cd533825f4da75e232bc493c28a0872e477d6db7 --- /dev/null +++ b/pytorch3d_simplified/pytorch3d/csrc/sample_farthest_points/sample_farthest_points_cpu.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +at::Tensor FarthestPointSamplingCpu( + const at::Tensor& points, + const at::Tensor& lengths, + const at::Tensor& K, + const at::Tensor& start_idxs) { + // Get constants + const int64_t N = points.size(0); + const int64_t P = points.size(1); + const int64_t D = points.size(2); + const int64_t max_K = torch::max(K).item(); + + // Initialize an output array for the sampled indices + // of shape (N, max_K) + auto long_opts = lengths.options(); + torch::Tensor sampled_indices = torch::full({N, max_K}, -1, long_opts); + + // Create accessors for all tensors + auto points_a = points.accessor(); + auto lengths_a = lengths.accessor(); + auto k_a = K.accessor(); + auto sampled_indices_a = sampled_indices.accessor(); + auto start_idxs_a = start_idxs.accessor(); + + // Initialize a mask to prevent duplicates + // If true, the point has already been selected. + std::vector selected_points_mask(P, false); + + // Initialize to infinity a vector of + // distances from each point to any of the previously selected points + std::vector dists(P, std::numeric_limits::max()); + + for (int64_t n = 0; n < N; ++n) { + // Resize and reset points mask and distances for each batch + selected_points_mask.resize(lengths_a[n]); + dists.resize(lengths_a[n]); + std::fill(selected_points_mask.begin(), selected_points_mask.end(), false); + std::fill(dists.begin(), dists.end(), std::numeric_limits::max()); + + // Get the starting point index and save it + int64_t last_idx = start_idxs_a[n]; + sampled_indices_a[n][0] = last_idx; + + // Set the value of the mask at this point to false + selected_points_mask[last_idx] = true; + + // For heterogeneous pointclouds, use the minimum of the + // length for that cloud compared to K as the number of + // points to sample + const int64_t batch_k = std::min(lengths_a[n], k_a[n]); + + // Iteratively select batch_k points per batch + for (int64_t k = 1; k < batch_k; ++k) { + // Iterate through all the points + for (int64_t p = 0; p < lengths_a[n]; ++p) { + if (selected_points_mask[p]) { + // For already selected points set the distance to 0.0 + dists[p] = 0.0; + continue; + } + + // Calculate the distance to the last selected point + float dist2 = 0.0; + for (int64_t d = 0; d < D; ++d) { + float diff = points_a[n][last_idx][d] - points_a[n][p][d]; + dist2 += diff * diff; + } + + // If the distance of this point to the last selected point is closer + // than the distance to any of the previously selected points, then + // update this distance + if (dist2 < dists[p]) { + dists[p] = dist2; + } + } + + // The aim is to pick the point that has the largest + // nearest neighbour distance to any of the already selected points + auto itr = std::max_element(dists.begin(), dists.end()); + last_idx = std::distance(dists.begin(), itr); + + // Save selected point + sampled_indices_a[n][k] = last_idx; + + // Set the mask value to true to prevent duplicates. + selected_points_mask[last_idx] = true; + } + } + + return sampled_indices; +} diff --git a/pytorch3d_simplified/pytorch3d/csrc/utils/dispatch.cuh b/pytorch3d_simplified/pytorch3d/csrc/utils/dispatch.cuh new file mode 100644 index 0000000000000000000000000000000000000000..83f3d69ff40907c396e3d175402d5cf4561142b5 --- /dev/null +++ b/pytorch3d_simplified/pytorch3d/csrc/utils/dispatch.cuh @@ -0,0 +1,357 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +// This file provides utilities for dispatching to specialized versions of +// functions. This is especially useful for CUDA kernels, since specializing +// them to particular input sizes can often allow the compiler to unroll loops +// and place arrays into registers, which can give huge performance speedups. +// +// As an example, suppose we have the following function which is specialized +// based on a compile-time int64_t value: +// +// template +// struct SquareOffset { +// static void run(T y) { +// T val = x * x + y; +// std::cout << val << std::endl; +// } +// } +// +// This function takes one compile-time argument x, and one run-time argument y. +// We might want to compile specialized versions of this for x=0, x=1, etc and +// then dispatch to the correct one based on the runtime value of x. +// One simple way to achieve this is with a lookup table: +// +// template +// void DispatchSquareOffset(const int64_t x, T y) { +// if (x == 0) { +// SquareOffset::run(y); +// } else if (x == 1) { +// SquareOffset::run(y); +// } else if (x == 2) { +// SquareOffset::run(y); +// } +// } +// +// This function takes both x and y as run-time arguments, and dispatches to +// different specialized versions of SquareOffset based on the run-time value +// of x. This works, but it's tedious and error-prone. If we want to change the +// set of x values for which we provide compile-time specializations, then we +// will need to do a lot of tedius editing of the dispatch function. Also, if we +// want to provide compile-time specializations for another function other than +// SquareOffset, we will need to duplicate the entire lookup table. +// +// To solve these problems, we can use the DispatchKernel1D function provided by +// this file instead: +// +// template +// void DispatchSquareOffset(const int64_t x, T y) { +// constexpr int64_t xmin = 0; +// constexpr int64_t xmax = 2; +// DispatchKernel1D(x, y); +// } +// +// DispatchKernel1D uses template metaprogramming to compile specialized +// versions of SquareOffset for all values of x with xmin <= x <= xmax, and +// then dispatches to the correct one based on the run-time value of x. If we +// want to change the range of x values for which SquareOffset is specialized +// at compile-time, then all we have to do is change the values of the +// compile-time constants xmin and xmax. +// +// This file also allows us to similarly dispatch functions that depend on two +// compile-time int64_t values, using the DispatchKernel2D function like this: +// +// template +// struct Sum { +// static void run(T z, T w) { +// T val = x + y + z + w; +// std::cout << val << std::endl; +// } +// } +// +// template +// void DispatchSum(const int64_t x, const int64_t y, int z, int w) { +// constexpr int64_t xmin = 1; +// constexpr int64_t xmax = 3; +// constexpr int64_t ymin = 2; +// constexpr int64_t ymax = 5; +// DispatchKernel2D(x, y, z, w); +// } +// +// Like its 1D counterpart, DispatchKernel2D uses template metaprogramming to +// compile specialized versions of sum for all values of (x, y) with +// xmin <= x <= xmax and ymin <= y <= ymax, then dispatches to the correct +// specialized version based on the runtime values of x and y. + +// Define some helper structs in an anonymous namespace. +namespace { + +// 1D dispatch: general case. +// Kernel is the function we want to dispatch to; it should take a typename and +// an int64_t as template args, and it should define a static void function +// run which takes any number of arguments of any type. +// In order to dispatch, we will take an additional template argument curN, +// and increment it via template recursion until it is equal to the run-time +// argument N. +template < + template + class Kernel, + typename T, + int64_t minN, + int64_t maxN, + int64_t curN, + typename... Args> +struct DispatchKernelHelper1D { + static void run(const int64_t N, Args... args) { + if (curN == N) { + // The compile-time value curN is equal to the run-time value N, so we + // can dispatch to the run method of the Kernel. + Kernel::run(args...); + } else if (curN < N) { + // Increment curN via template recursion + DispatchKernelHelper1D::run( + N, args...); + } + // We shouldn't get here -- throw an error? + } +}; + +// 1D dispatch: Specialization when curN == maxN +// We need this base case to avoid infinite template recursion. +template < + template + class Kernel, + typename T, + int64_t minN, + int64_t maxN, + typename... Args> +struct DispatchKernelHelper1D { + static void run(const int64_t N, Args... args) { + if (N == maxN) { + Kernel::run(args...); + } + // We shouldn't get here -- throw an error? + } +}; + +// 2D dispatch, general case. +// This is similar to the 1D case: we take additional template args curN and +// curM, and increment them via template recursion until they are equal to +// the run-time values of N and M, at which point we dispatch to the run +// method of the kernel. +template < + template + class Kernel, + typename T, + int64_t minN, + int64_t maxN, + int64_t curN, + int64_t minM, + int64_t maxM, + int64_t curM, + typename... Args> +struct DispatchKernelHelper2D { + static void run(const int64_t N, const int64_t M, Args... args) { + if (curN == N && curM == M) { + Kernel::run(args...); + } else if (curN < N && curM < M) { + // Increment both curN and curM. This isn't strictly necessary; we could + // just increment one or the other at each step. But this helps to cut + // on the number of recursive calls we make. + DispatchKernelHelper2D< + Kernel, + T, + minN, + maxN, + curN + 1, + minM, + maxM, + curM + 1, + Args...>::run(N, M, args...); + } else if (curN < N) { + // Increment curN only + DispatchKernelHelper2D< + Kernel, + T, + minN, + maxN, + curN + 1, + minM, + maxM, + curM, + Args...>::run(N, M, args...); + } else if (curM < M) { + // Increment curM only + DispatchKernelHelper2D< + Kernel, + T, + minN, + maxN, + curN, + minM, + maxM, + curM + 1, + Args...>::run(N, M, args...); + } + } +}; + +// 2D dispatch, specialization for curN == maxN +template < + template + class Kernel, + typename T, + int64_t minN, + int64_t maxN, + int64_t minM, + int64_t maxM, + int64_t curM, + typename... Args> +struct DispatchKernelHelper2D< + Kernel, + T, + minN, + maxN, + maxN, + minM, + maxM, + curM, + Args...> { + static void run(const int64_t N, const int64_t M, Args... args) { + if (maxN == N && curM == M) { + Kernel::run(args...); + } else if (curM < maxM) { + DispatchKernelHelper2D< + Kernel, + T, + minN, + maxN, + maxN, + minM, + maxM, + curM + 1, + Args...>::run(N, M, args...); + } + // We should not get here -- throw an error? + } +}; + +// 2D dispatch, specialization for curM == maxM +template < + template + class Kernel, + typename T, + int64_t minN, + int64_t maxN, + int64_t curN, + int64_t minM, + int64_t maxM, + typename... Args> +struct DispatchKernelHelper2D< + Kernel, + T, + minN, + maxN, + curN, + minM, + maxM, + maxM, + Args...> { + static void run(const int64_t N, const int64_t M, Args... args) { + if (curN == N && maxM == M) { + Kernel::run(args...); + } else if (curN < maxN) { + DispatchKernelHelper2D< + Kernel, + T, + minN, + maxN, + curN + 1, + minM, + maxM, + maxM, + Args...>::run(N, M, args...); + } + // We should not get here -- throw an error? + } +}; + +// 2D dispatch, specialization for curN == maxN, curM == maxM +template < + template + class Kernel, + typename T, + int64_t minN, + int64_t maxN, + int64_t minM, + int64_t maxM, + typename... Args> +struct DispatchKernelHelper2D< + Kernel, + T, + minN, + maxN, + maxN, + minM, + maxM, + maxM, + Args...> { + static void run(const int64_t N, const int64_t M, Args... args) { + if (maxN == N && maxM == M) { + Kernel::run(args...); + } + // We should not get here -- throw an error? + } +}; + +} // namespace + +// This is the function we expect users to call to dispatch to 1D functions +template < + template + class Kernel, + typename T, + int64_t minN, + int64_t maxN, + typename... Args> +void DispatchKernel1D(const int64_t N, Args... args) { + if (minN <= N && N <= maxN) { + // Kick off the template recursion by calling the Helper with curN = minN + DispatchKernelHelper1D::run( + N, args...); + } + // Maybe throw an error if we tried to dispatch outside the allowed range? +} + +// This is the function we expect users to call to dispatch to 2D functions +template < + template + class Kernel, + typename T, + int64_t minN, + int64_t maxN, + int64_t minM, + int64_t maxM, + typename... Args> +void DispatchKernel2D(const int64_t N, const int64_t M, Args... args) { + if (minN <= N && N <= maxN && minM <= M && M <= maxM) { + // Kick off the template recursion by calling the Helper with curN = minN + // and curM = minM + DispatchKernelHelper2D< + Kernel, + T, + minN, + maxN, + minN, + minM, + maxM, + minM, + Args...>::run(N, M, args...); + } + // Maybe throw an error if we tried to dispatch outside the specified range? +} diff --git a/pytorch3d_simplified/pytorch3d/csrc/utils/float_math.cuh b/pytorch3d_simplified/pytorch3d/csrc/utils/float_math.cuh new file mode 100644 index 0000000000000000000000000000000000000000..e48e960e96544fd901655ce1d0217513d300187b --- /dev/null +++ b/pytorch3d_simplified/pytorch3d/csrc/utils/float_math.cuh @@ -0,0 +1,153 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once +#include + +// Set epsilon +#ifdef _MSC_VER +#define vEpsilon 1e-8f +#else +const auto vEpsilon = 1e-8; +#endif + +// Common functions and operators for float2. + +__device__ inline float2 operator-(const float2& a, const float2& b) { + return make_float2(a.x - b.x, a.y - b.y); +} + +__device__ inline float2 operator+(const float2& a, const float2& b) { + return make_float2(a.x + b.x, a.y + b.y); +} + +__device__ inline float2 operator/(const float2& a, const float2& b) { + return make_float2(a.x / b.x, a.y / b.y); +} + +__device__ inline float2 operator/(const float2& a, const float b) { + return make_float2(a.x / b, a.y / b); +} + +__device__ inline float2 operator*(const float2& a, const float2& b) { + return make_float2(a.x * b.x, a.y * b.y); +} + +__device__ inline float2 operator*(const float a, const float2& b) { + return make_float2(a * b.x, a * b.y); +} + +__device__ inline float FloatMin3(const float a, const float b, const float c) { + return fminf(a, fminf(b, c)); +} + +__device__ inline float FloatMax3(const float a, const float b, const float c) { + return fmaxf(a, fmaxf(b, c)); +} + +__device__ inline float dot(const float2& a, const float2& b) { + return a.x * b.x + a.y * b.y; +} + +// Backward pass for the dot product. +// Args: +// a, b: Coordinates of two points. +// grad_dot: Upstream gradient for the output. +// +// Returns: +// tuple of gradients for each of the input points: +// (float2 grad_a, float2 grad_b) +// +__device__ inline thrust::tuple +DotBackward(const float2& a, const float2& b, const float& grad_dot) { + return thrust::make_tuple(grad_dot * b, grad_dot * a); +} + +__device__ inline float sum(const float2& a) { + return a.x + a.y; +} + +// Common functions and operators for float3. + +__device__ inline float3 operator-(const float3& a, const float3& b) { + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); +} + +__device__ inline float3 operator+(const float3& a, const float3& b) { + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); +} + +__device__ inline float3 operator/(const float3& a, const float3& b) { + return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); +} + +__device__ inline float3 operator/(const float3& a, const float b) { + return make_float3(a.x / b, a.y / b, a.z / b); +} + +__device__ inline float3 operator*(const float3& a, const float3& b) { + return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); +} + +__device__ inline float3 operator*(const float a, const float3& b) { + return make_float3(a * b.x, a * b.y, a * b.z); +} + +__device__ inline float dot(const float3& a, const float3& b) { + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +__device__ inline float sum(const float3& a) { + return a.x + a.y + a.z; +} + +__device__ inline float3 cross(const float3& a, const float3& b) { + return make_float3( + a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); +} + +__device__ inline thrust::tuple +cross_backward(const float3& a, const float3& b, const float3& grad_cross) { + const float grad_ax = -grad_cross.y * b.z + grad_cross.z * b.y; + const float grad_ay = grad_cross.x * b.z - grad_cross.z * b.x; + const float grad_az = -grad_cross.x * b.y + grad_cross.y * b.x; + const float3 grad_a = make_float3(grad_ax, grad_ay, grad_az); + + const float grad_bx = grad_cross.y * a.z - grad_cross.z * a.y; + const float grad_by = -grad_cross.x * a.z + grad_cross.z * a.x; + const float grad_bz = grad_cross.x * a.y - grad_cross.y * a.x; + const float3 grad_b = make_float3(grad_bx, grad_by, grad_bz); + + return thrust::make_tuple(grad_a, grad_b); +} + +__device__ inline float norm(const float3& a) { + return sqrt(dot(a, a)); +} + +__device__ inline float3 normalize(const float3& a) { + return a / (norm(a) + vEpsilon); +} + +__device__ inline float3 normalize_backward( + const float3& a, + const float3& grad_normz) { + const float a_norm = norm(a) + vEpsilon; + const float3 out = a / a_norm; + + const float grad_ax = grad_normz.x * (1.0f - out.x * out.x) / a_norm + + grad_normz.y * (-out.x * out.y) / a_norm + + grad_normz.z * (-out.x * out.z) / a_norm; + const float grad_ay = grad_normz.x * (-out.x * out.y) / a_norm + + grad_normz.y * (1.0f - out.y * out.y) / a_norm + + grad_normz.z * (-out.y * out.z) / a_norm; + const float grad_az = grad_normz.x * (-out.x * out.z) / a_norm + + grad_normz.y * (-out.y * out.z) / a_norm + + grad_normz.z * (1.0f - out.z * out.z) / a_norm; + return make_float3(grad_ax, grad_ay, grad_az); +} diff --git a/pytorch3d_simplified/pytorch3d/csrc/utils/geometry_utils.cuh b/pytorch3d_simplified/pytorch3d/csrc/utils/geometry_utils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..66aee7fc7bcd3495bc7dbba56d89995d383b655e --- /dev/null +++ b/pytorch3d_simplified/pytorch3d/csrc/utils/geometry_utils.cuh @@ -0,0 +1,792 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include "float_math.cuh" + +// Set epsilon for preventing floating point errors and division by 0. +#ifdef _MSC_VER +#define kEpsilon 1e-8f +#else +const auto kEpsilon = 1e-8; +#endif + +// ************************************************************* // +// vec2 utils // +// ************************************************************* // + +// Determines whether a point p is on the right side of a 2D line segment +// given by the end points v0, v1. +// +// Args: +// p: vec2 Coordinates of a point. +// v0, v1: vec2 Coordinates of the end points of the edge. +// +// Returns: +// area: The signed area of the parallelogram given by the vectors +// A = p - v0 +// B = v1 - v0 +// +__device__ inline float +EdgeFunctionForward(const float2& p, const float2& v0, const float2& v1) { + return (p.x - v0.x) * (v1.y - v0.y) - (p.y - v0.y) * (v1.x - v0.x); +} + +// Backward pass for the edge function returning partial dervivatives for each +// of the input points. +// +// Args: +// p: vec2 Coordinates of a point. +// v0, v1: vec2 Coordinates of the end points of the edge. +// grad_edge: Upstream gradient for output from edge function. +// +// Returns: +// tuple of gradients for each of the input points: +// (float2 d_edge_dp, float2 d_edge_dv0, float2 d_edge_dv1) +// +__device__ inline thrust::tuple EdgeFunctionBackward( + const float2& p, + const float2& v0, + const float2& v1, + const float& grad_edge) { + const float2 dedge_dp = make_float2(v1.y - v0.y, v0.x - v1.x); + const float2 dedge_dv0 = make_float2(p.y - v1.y, v1.x - p.x); + const float2 dedge_dv1 = make_float2(v0.y - p.y, p.x - v0.x); + return thrust::make_tuple( + grad_edge * dedge_dp, grad_edge * dedge_dv0, grad_edge * dedge_dv1); +} + +// The forward pass for computing the barycentric coordinates of a point +// relative to a triangle. +// +// Args: +// p: Coordinates of a point. +// v0, v1, v2: Coordinates of the triangle vertices. +// +// Returns +// bary: (w0, w1, w2) barycentric coordinates in the range [0, 1]. +// +__device__ inline float3 BarycentricCoordsForward( + const float2& p, + const float2& v0, + const float2& v1, + const float2& v2) { + const float area = EdgeFunctionForward(v2, v0, v1) + kEpsilon; + const float w0 = EdgeFunctionForward(p, v1, v2) / area; + const float w1 = EdgeFunctionForward(p, v2, v0) / area; + const float w2 = EdgeFunctionForward(p, v0, v1) / area; + return make_float3(w0, w1, w2); +} + +// The backward pass for computing the barycentric coordinates of a point +// relative to a triangle. +// +// Args: +// p: Coordinates of a point. +// v0, v1, v2: (x, y) coordinates of the triangle vertices. +// grad_bary_upstream: vec3 Upstream gradient for each of the +// barycentric coordaintes [grad_w0, grad_w1, grad_w2]. +// +// Returns +// tuple of gradients for each of the triangle vertices: +// (float2 grad_v0, float2 grad_v1, float2 grad_v2) +// +__device__ inline thrust::tuple +BarycentricCoordsBackward( + const float2& p, + const float2& v0, + const float2& v1, + const float2& v2, + const float3& grad_bary_upstream) { + const float area = EdgeFunctionForward(v2, v0, v1) + kEpsilon; + const float area2 = pow(area, 2.0f); + const float e0 = EdgeFunctionForward(p, v1, v2); + const float e1 = EdgeFunctionForward(p, v2, v0); + const float e2 = EdgeFunctionForward(p, v0, v1); + + const float grad_w0 = grad_bary_upstream.x; + const float grad_w1 = grad_bary_upstream.y; + const float grad_w2 = grad_bary_upstream.z; + + // Calculate component of the gradient from each of w0, w1 and w2. + // e.g. for w0: + // dloss/dw0_v = dl/dw0 * dw0/dw0_top * dw0_top/dv + // + dl/dw0 * dw0/dw0_bot * dw0_bot/dv + const float dw0_darea = -e0 / (area2); + const float dw0_e0 = 1 / area; + const float dloss_d_w0area = grad_w0 * dw0_darea; + const float dloss_e0 = grad_w0 * dw0_e0; + auto de0_dv = EdgeFunctionBackward(p, v1, v2, dloss_e0); + auto dw0area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w0area); + const float2 dw0_p = thrust::get<0>(de0_dv); + const float2 dw0_dv0 = thrust::get<1>(dw0area_dv); + const float2 dw0_dv1 = thrust::get<1>(de0_dv) + thrust::get<2>(dw0area_dv); + const float2 dw0_dv2 = thrust::get<2>(de0_dv) + thrust::get<0>(dw0area_dv); + + const float dw1_darea = -e1 / (area2); + const float dw1_e1 = 1 / area; + const float dloss_d_w1area = grad_w1 * dw1_darea; + const float dloss_e1 = grad_w1 * dw1_e1; + auto de1_dv = EdgeFunctionBackward(p, v2, v0, dloss_e1); + auto dw1area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w1area); + const float2 dw1_p = thrust::get<0>(de1_dv); + const float2 dw1_dv0 = thrust::get<2>(de1_dv) + thrust::get<1>(dw1area_dv); + const float2 dw1_dv1 = thrust::get<2>(dw1area_dv); + const float2 dw1_dv2 = thrust::get<1>(de1_dv) + thrust::get<0>(dw1area_dv); + + const float dw2_darea = -e2 / (area2); + const float dw2_e2 = 1 / area; + const float dloss_d_w2area = grad_w2 * dw2_darea; + const float dloss_e2 = grad_w2 * dw2_e2; + auto de2_dv = EdgeFunctionBackward(p, v0, v1, dloss_e2); + auto dw2area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w2area); + const float2 dw2_p = thrust::get<0>(de2_dv); + const float2 dw2_dv0 = thrust::get<1>(de2_dv) + thrust::get<1>(dw2area_dv); + const float2 dw2_dv1 = thrust::get<2>(de2_dv) + thrust::get<2>(dw2area_dv); + const float2 dw2_dv2 = thrust::get<0>(dw2area_dv); + + const float2 dbary_p = dw0_p + dw1_p + dw2_p; + const float2 dbary_dv0 = dw0_dv0 + dw1_dv0 + dw2_dv0; + const float2 dbary_dv1 = dw0_dv1 + dw1_dv1 + dw2_dv1; + const float2 dbary_dv2 = dw0_dv2 + dw1_dv2 + dw2_dv2; + + return thrust::make_tuple(dbary_p, dbary_dv0, dbary_dv1, dbary_dv2); +} + +// Forward pass for applying perspective correction to barycentric coordinates. +// +// Args: +// bary: Screen-space barycentric coordinates for a point +// z0, z1, z2: Camera-space z-coordinates of the triangle vertices +// +// Returns +// World-space barycentric coordinates +// +__device__ inline float3 BarycentricPerspectiveCorrectionForward( + const float3& bary, + const float z0, + const float z1, + const float z2) { + const float w0_top = bary.x * z1 * z2; + const float w1_top = z0 * bary.y * z2; + const float w2_top = z0 * z1 * bary.z; + const float denom = fmaxf(w0_top + w1_top + w2_top, kEpsilon); + const float w0 = w0_top / denom; + const float w1 = w1_top / denom; + const float w2 = w2_top / denom; + return make_float3(w0, w1, w2); +} + +// Backward pass for applying perspective correction to barycentric coordinates. +// +// Args: +// bary: Screen-space barycentric coordinates for a point +// z0, z1, z2: Camera-space z-coordinates of the triangle vertices +// grad_out: Upstream gradient of the loss with respect to the corrected +// barycentric coordinates. +// +// Returns a tuple of: +// grad_bary: Downstream gradient of the loss with respect to the the +// uncorrected barycentric coordinates. +// grad_z0, grad_z1, grad_z2: Downstream gradient of the loss with respect +// to the z-coordinates of the triangle verts +__device__ inline thrust::tuple +BarycentricPerspectiveCorrectionBackward( + const float3& bary, + const float z0, + const float z1, + const float z2, + const float3& grad_out) { + // Recompute forward pass + const float w0_top = bary.x * z1 * z2; + const float w1_top = z0 * bary.y * z2; + const float w2_top = z0 * z1 * bary.z; + const float denom = fmaxf(w0_top + w1_top + w2_top, kEpsilon); + + // Now do backward pass + const float grad_denom_top = + -w0_top * grad_out.x - w1_top * grad_out.y - w2_top * grad_out.z; + const float grad_denom = grad_denom_top / (denom * denom); + const float grad_w0_top = grad_denom + grad_out.x / denom; + const float grad_w1_top = grad_denom + grad_out.y / denom; + const float grad_w2_top = grad_denom + grad_out.z / denom; + const float grad_bary_x = grad_w0_top * z1 * z2; + const float grad_bary_y = grad_w1_top * z0 * z2; + const float grad_bary_z = grad_w2_top * z0 * z1; + const float3 grad_bary = make_float3(grad_bary_x, grad_bary_y, grad_bary_z); + const float grad_z0 = grad_w1_top * bary.y * z2 + grad_w2_top * bary.z * z1; + const float grad_z1 = grad_w0_top * bary.x * z2 + grad_w2_top * bary.z * z0; + const float grad_z2 = grad_w0_top * bary.x * z1 + grad_w1_top * bary.y * z0; + return thrust::make_tuple(grad_bary, grad_z0, grad_z1, grad_z2); +} + +// Clip negative barycentric coordinates to 0.0 and renormalize so +// the barycentric coordinates for a point sum to 1. When the blur_radius +// is greater than 0, a face will still be recorded as overlapping a pixel +// if the pixel is outside the face. In this case at least one of the +// barycentric coordinates for the pixel relative to the face will be negative. +// Clipping will ensure that the texture and z buffer are interpolated +// correctly. +// +// Args +// bary: (w0, w1, w2) barycentric coordinates which can be outside the +// range [0, 1]. +// +// Returns +// bary: (w0, w1, w2) barycentric coordinates in the range [0, 1] which +// satisfy the condition: sum(w0, w1, w2) = 1.0. +// +__device__ inline float3 BarycentricClipForward(const float3 bary) { + float3 w = make_float3(0.0f, 0.0f, 0.0f); + // Clamp lower bound only + w.x = max(bary.x, 0.0); + w.y = max(bary.y, 0.0); + w.z = max(bary.z, 0.0); + float w_sum = w.x + w.y + w.z; + w_sum = fmaxf(w_sum, 1e-5); + w.x /= w_sum; + w.y /= w_sum; + w.z /= w_sum; + + return w; +} + +// Backward pass for barycentric coordinate clipping. +// +// Args +// bary: (w0, w1, w2) barycentric coordinates which can be outside the +// range [0, 1]. +// grad_baryclip_upstream: vec3 Upstream gradient for each of the clipped +// barycentric coordinates [grad_w0, grad_w1, grad_w2]. +// +// Returns +// vec3 of gradients for the unclipped barycentric coordinates: +// (grad_w0, grad_w1, grad_w2) +// +__device__ inline float3 BarycentricClipBackward( + const float3 bary, + const float3 grad_baryclip_upstream) { + // Redo some of the forward pass calculations + float3 w = make_float3(0.0f, 0.0f, 0.0f); + // Clamp lower bound only + w.x = max(bary.x, 0.0); + w.y = max(bary.y, 0.0); + w.z = max(bary.z, 0.0); + float w_sum = w.x + w.y + w.z; + + float3 grad_bary = make_float3(1.0f, 1.0f, 1.0f); + float3 grad_clip = make_float3(1.0f, 1.0f, 1.0f); + float3 grad_sum = make_float3(1.0f, 1.0f, 1.0f); + + // Check if sum was clipped. + float grad_sum_clip = 1.0f; + if (w_sum < 1e-5) { + grad_sum_clip = 0.0f; + w_sum = 1e-5; + } + + // Check if any of bary values have been clipped. + if (bary.x < 0.0f) { + grad_clip.x = 0.0f; + } + if (bary.y < 0.0f) { + grad_clip.y = 0.0f; + } + if (bary.z < 0.0f) { + grad_clip.z = 0.0f; + } + + // Gradients of the sum. + grad_sum.x = -w.x / (pow(w_sum, 2.0f)) * grad_sum_clip; + grad_sum.y = -w.y / (pow(w_sum, 2.0f)) * grad_sum_clip; + grad_sum.z = -w.z / (pow(w_sum, 2.0f)) * grad_sum_clip; + + // Gradients for each of the bary coordinates including the cross terms + // from the sum. + grad_bary.x = grad_clip.x * + (grad_baryclip_upstream.x * (1.0f / w_sum + grad_sum.x) + + grad_baryclip_upstream.y * (grad_sum.y) + + grad_baryclip_upstream.z * (grad_sum.z)); + + grad_bary.y = grad_clip.y * + (grad_baryclip_upstream.y * (1.0f / w_sum + grad_sum.y) + + grad_baryclip_upstream.x * (grad_sum.x) + + grad_baryclip_upstream.z * (grad_sum.z)); + + grad_bary.z = grad_clip.z * + (grad_baryclip_upstream.z * (1.0f / w_sum + grad_sum.z) + + grad_baryclip_upstream.x * (grad_sum.x) + + grad_baryclip_upstream.y * (grad_sum.y)); + + return grad_bary; +} + +// Return minimum distance between line segment (v1 - v0) and point p. +// +// Args: +// p: Coordinates of a point. +// v0, v1: Coordinates of the end points of the line segment. +// +// Returns: +// squared distance to the boundary of the triangle. +// +__device__ inline float +PointLineDistanceForward(const float2& p, const float2& a, const float2& b) { + const float2 ba = b - a; + float l2 = dot(ba, ba); + float t = dot(ba, p - a) / l2; + if (l2 <= kEpsilon) { + return dot(p - b, p - b); + } + t = __saturatef(t); // clamp to the interval [+0.0, 1.0] + const float2 p_proj = a + t * ba; + const float2 d = (p_proj - p); + return dot(d, d); // squared distance +} + +// Backward pass for point to line distance in 2D. +// +// Args: +// p: Coordinates of a point. +// v0, v1: Coordinates of the end points of the line segment. +// grad_dist: Upstream gradient for the distance. +// +// Returns: +// tuple of gradients for each of the input points: +// (float2 grad_p, float2 grad_v0, float2 grad_v1) +// +__device__ inline thrust::tuple +PointLineDistanceBackward( + const float2& p, + const float2& v0, + const float2& v1, + const float& grad_dist) { + // Redo some of the forward pass calculations. + const float2 v1v0 = v1 - v0; + const float2 pv0 = p - v0; + const float t_bot = dot(v1v0, v1v0); + const float t_top = dot(v1v0, pv0); + float tt = t_top / t_bot; + tt = __saturatef(tt); + const float2 p_proj = (1.0f - tt) * v0 + tt * v1; + const float2 d = p - p_proj; + const float dist = sqrt(dot(d, d)); + + const float2 grad_p = -1.0f * grad_dist * 2.0f * (p_proj - p); + const float2 grad_v0 = grad_dist * (1.0f - tt) * 2.0f * (p_proj - p); + const float2 grad_v1 = grad_dist * tt * 2.0f * (p_proj - p); + + return thrust::make_tuple(grad_p, grad_v0, grad_v1); +} + +// The forward pass for calculating the shortest distance between a point +// and a triangle. +// +// Args: +// p: Coordinates of a point. +// v0, v1, v2: Coordinates of the three triangle vertices. +// +// Returns: +// shortest squared distance from a point to a triangle. +// +__device__ inline float PointTriangleDistanceForward( + const float2& p, + const float2& v0, + const float2& v1, + const float2& v2) { + // Compute distance to all 3 edges of the triangle and return the min. + const float e01_dist = PointLineDistanceForward(p, v0, v1); + const float e02_dist = PointLineDistanceForward(p, v0, v2); + const float e12_dist = PointLineDistanceForward(p, v1, v2); + const float edge_dist = fminf(fminf(e01_dist, e02_dist), e12_dist); + return edge_dist; +} + +// Backward pass for point triangle distance. +// +// Args: +// p: Coordinates of a point. +// v0, v1, v2: Coordinates of the three triangle vertices. +// grad_dist: Upstream gradient for the distance. +// +// Returns: +// tuple of gradients for each of the triangle vertices: +// (float2 grad_v0, float2 grad_v1, float2 grad_v2) +// +__device__ inline thrust::tuple +PointTriangleDistanceBackward( + const float2& p, + const float2& v0, + const float2& v1, + const float2& v2, + const float& grad_dist) { + // Compute distance to all 3 edges of the triangle. + const float e01_dist = PointLineDistanceForward(p, v0, v1); + const float e02_dist = PointLineDistanceForward(p, v0, v2); + const float e12_dist = PointLineDistanceForward(p, v1, v2); + + // Initialize output tensors. + float2 grad_v0 = make_float2(0.0f, 0.0f); + float2 grad_v1 = make_float2(0.0f, 0.0f); + float2 grad_v2 = make_float2(0.0f, 0.0f); + float2 grad_p = make_float2(0.0f, 0.0f); + + // Find which edge is the closest and return PointLineDistanceBackward for + // that edge. + if (e01_dist <= e02_dist && e01_dist <= e12_dist) { + // Closest edge is v1 - v0. + auto grad_e01 = PointLineDistanceBackward(p, v0, v1, grad_dist); + grad_p = thrust::get<0>(grad_e01); + grad_v0 = thrust::get<1>(grad_e01); + grad_v1 = thrust::get<2>(grad_e01); + } else if (e02_dist <= e01_dist && e02_dist <= e12_dist) { + // Closest edge is v2 - v0. + auto grad_e02 = PointLineDistanceBackward(p, v0, v2, grad_dist); + grad_p = thrust::get<0>(grad_e02); + grad_v0 = thrust::get<1>(grad_e02); + grad_v2 = thrust::get<2>(grad_e02); + } else if (e12_dist <= e01_dist && e12_dist <= e02_dist) { + // Closest edge is v2 - v1. + auto grad_e12 = PointLineDistanceBackward(p, v1, v2, grad_dist); + grad_p = thrust::get<0>(grad_e12); + grad_v1 = thrust::get<1>(grad_e12); + grad_v2 = thrust::get<2>(grad_e12); + } + + return thrust::make_tuple(grad_p, grad_v0, grad_v1, grad_v2); +} + +// ************************************************************* // +// vec3 utils // +// ************************************************************* // + +// Computes the area of a triangle (v0, v1, v2). +// +// Args: +// v0, v1, v2: vec3 coordinates of the triangle vertices +// +// Returns +// area: float: The area of the triangle +// +__device__ inline float +AreaOfTriangle(const float3& v0, const float3& v1, const float3& v2) { + float3 p0 = v1 - v0; + float3 p1 = v2 - v0; + + // compute the hypotenus of the scross product (p0 x p1) + float dd = hypot( + p0.y * p1.z - p0.z * p1.y, + hypot(p0.z * p1.x - p0.x * p1.z, p0.x * p1.y - p0.y * p1.x)); + + return dd / 2.0; +} + +// Computes the barycentric coordinates of a point p relative +// to a triangle (v0, v1, v2), i.e. p = w0 * v0 + w1 * v1 + w2 * v2 +// s.t. w0 + w1 + w2 = 1.0 +// +// NOTE that this function assumes that p lives on the space spanned +// by (v0, v1, v2). +// TODO(gkioxari) explicitly check whether p is coplanar with (v0, v1, v2) +// and throw an error if check fails +// +// Args: +// p: vec3 coordinates of a point +// v0, v1, v2: vec3 coordinates of the triangle vertices +// +// Returns +// bary: (w0, w1, w2) barycentric coordinates +// +__device__ inline float3 BarycentricCoords3Forward( + const float3& p, + const float3& v0, + const float3& v1, + const float3& v2) { + float3 p0 = v1 - v0; + float3 p1 = v2 - v0; + float3 p2 = p - v0; + + const float d00 = dot(p0, p0); + const float d01 = dot(p0, p1); + const float d11 = dot(p1, p1); + const float d20 = dot(p2, p0); + const float d21 = dot(p2, p1); + + const float denom = d00 * d11 - d01 * d01 + kEpsilon; + const float w1 = (d11 * d20 - d01 * d21) / denom; + const float w2 = (d00 * d21 - d01 * d20) / denom; + const float w0 = 1.0f - w1 - w2; + + return make_float3(w0, w1, w2); +} + +// Checks whether the point p is inside the triangle (v0, v1, v2). +// A point is inside the triangle, if all barycentric coordinates +// wrt the triangle are >= 0 & <= 1. +// If the triangle is degenerate, aka line or point, then return False. +// +// NOTE that this function assumes that p lives on the space spanned +// by (v0, v1, v2). +// TODO(gkioxari) explicitly check whether p is coplanar with (v0, v1, v2) +// and throw an error if check fails +// +// Args: +// p: vec3 coordinates of a point +// v0, v1, v2: vec3 coordinates of the triangle vertices +// min_triangle_area: triangles less than this size are considered +// points/lines, IsInsideTriangle returns False +// +// Returns: +// inside: bool indicating wether p is inside triangle +// +__device__ inline bool IsInsideTriangle( + const float3& p, + const float3& v0, + const float3& v1, + const float3& v2, + const double min_triangle_area) { + bool inside; + if (AreaOfTriangle(v0, v1, v2) < min_triangle_area) { + inside = 0; + } else { + float3 bary = BarycentricCoords3Forward(p, v0, v1, v2); + bool x_in = 0.0f <= bary.x && bary.x <= 1.0f; + bool y_in = 0.0f <= bary.y && bary.y <= 1.0f; + bool z_in = 0.0f <= bary.z && bary.z <= 1.0f; + inside = x_in && y_in && z_in; + } + return inside; +} + +// Computes the minimum squared Euclidean distance between the point p +// and the segment spanned by (v0, v1). +// To find this we parametrize p as: x(t) = v0 + t * (v1 - v0) +// and find t which minimizes (x(t) - p) ^ 2. +// Note that p does not need to live in the space spanned by (v0, v1) +// +// Args: +// p: vec3 coordinates of a point +// v0, v1: vec3 coordinates of start and end of segment +// +// Returns: +// dist: the minimum squared distance of p from segment (v0, v1) +// + +__device__ inline float +PointLine3DistanceForward(const float3& p, const float3& v0, const float3& v1) { + const float3 v1v0 = v1 - v0; + const float3 pv0 = p - v0; + const float t_bot = dot(v1v0, v1v0); + const float t_top = dot(pv0, v1v0); + // if t_bot small, then v0 == v1, set tt to 0. + float tt = (t_bot < kEpsilon) ? 0.0f : (t_top / t_bot); + + tt = __saturatef(tt); // clamps to [0, 1] + + const float3 p_proj = v0 + tt * v1v0; + const float3 diff = p - p_proj; + const float dist = dot(diff, diff); + return dist; +} + +// Backward function of the minimum squared Euclidean distance between the point +// p and the line segment (v0, v1). +// +// Args: +// p: vec3 coordinates of a point +// v0, v1: vec3 coordinates of start and end of segment +// grad_dist: Float of the gradient wrt dist +// +// Returns: +// tuple of gradients for the point and line segment (v0, v1): +// (float3 grad_p, float3 grad_v0, float3 grad_v1) + +__device__ inline thrust::tuple +PointLine3DistanceBackward( + const float3& p, + const float3& v0, + const float3& v1, + const float& grad_dist) { + const float3 v1v0 = v1 - v0; + const float3 pv0 = p - v0; + const float t_bot = dot(v1v0, v1v0); + const float t_top = dot(v1v0, pv0); + + float3 grad_p = make_float3(0.0f, 0.0f, 0.0f); + float3 grad_v0 = make_float3(0.0f, 0.0f, 0.0f); + float3 grad_v1 = make_float3(0.0f, 0.0f, 0.0f); + + const float tt = t_top / t_bot; + + if (t_bot < kEpsilon) { + // if t_bot small, then v0 == v1, + // and dist = 0.5 * dot(pv0, pv0) + 0.5 * dot(pv1, pv1) + grad_p = grad_dist * 2.0f * pv0; + grad_v0 = -0.5f * grad_p; + grad_v1 = grad_v0; + } else if (tt < 0.0f) { + grad_p = grad_dist * 2.0f * pv0; + grad_v0 = -1.0f * grad_p; + // no gradients wrt v1 + } else if (tt > 1.0f) { + grad_p = grad_dist * 2.0f * (p - v1); + grad_v1 = -1.0f * grad_p; + // no gradients wrt v0 + } else { + const float3 p_proj = v0 + tt * v1v0; + const float3 diff = p - p_proj; + const float3 grad_base = grad_dist * 2.0f * diff; + grad_p = grad_base - dot(grad_base, v1v0) * v1v0 / t_bot; + const float3 dtt_v0 = (-1.0f * v1v0 - pv0 + 2.0f * tt * v1v0) / t_bot; + grad_v0 = (-1.0f + tt) * grad_base - dot(grad_base, v1v0) * dtt_v0; + const float3 dtt_v1 = (pv0 - 2.0f * tt * v1v0) / t_bot; + grad_v1 = -dot(grad_base, v1v0) * dtt_v1 - tt * grad_base; + } + + return thrust::make_tuple(grad_p, grad_v0, grad_v1); +} + +// Computes the squared distance of a point p relative to a triangle (v0, v1, +// v2). If the point's projection p0 on the plane spanned by (v0, v1, v2) is +// inside the triangle with vertices (v0, v1, v2), then the returned value is +// the squared distance of p to its projection p0. Otherwise, the returned value +// is the smallest squared distance of p from the line segments (v0, v1), (v0, +// v2) and (v1, v2). +// +// Args: +// p: vec3 coordinates of a point +// v0, v1, v2: vec3 coordinates of the triangle vertices +// min_triangle_area: triangles less than this size are considered +// points/lines, IsInsideTriangle returns False +// +// Returns: +// dist: Float of the squared distance +// + +__device__ inline float PointTriangle3DistanceForward( + const float3& p, + const float3& v0, + const float3& v1, + const float3& v2, + const double min_triangle_area) { + float3 normal = cross(v2 - v0, v1 - v0); + const float norm_normal = norm(normal); + normal = normalize(normal); + + // p0 is the projection of p on the plane spanned by (v0, v1, v2) + // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal + const float t = dot(v0 - p, normal); + const float3 p0 = p + t * normal; + + bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area); + float dist = 0.0f; + + if ((is_inside) && (norm_normal > kEpsilon)) { + // if projection p0 is inside triangle spanned by (v0, v1, v2) + // then distance is equal to norm(p0 - p)^2 + dist = t * t; + } else { + const float e01 = PointLine3DistanceForward(p, v0, v1); + const float e02 = PointLine3DistanceForward(p, v0, v2); + const float e12 = PointLine3DistanceForward(p, v1, v2); + + dist = (e01 > e02) ? e02 : e01; + dist = (dist > e12) ? e12 : dist; + } + + return dist; +} + +// The backward pass for computing the squared distance of a point +// to the triangle (v0, v1, v2). +// +// Args: +// p: xyz coordinates of a point +// v0, v1, v2: xyz coordinates of the triangle vertices +// grad_dist: Float of the gradient wrt dist +// min_triangle_area: triangles less than this size are considered +// points/lines, IsInsideTriangle returns False +// +// Returns: +// tuple of gradients for the point and triangle: +// (float3 grad_p, float3 grad_v0, float3 grad_v1, float3 grad_v2) +// + +__device__ inline thrust::tuple +PointTriangle3DistanceBackward( + const float3& p, + const float3& v0, + const float3& v1, + const float3& v2, + const float& grad_dist, + const double min_triangle_area) { + const float3 v2v0 = v2 - v0; + const float3 v1v0 = v1 - v0; + const float3 v0p = v0 - p; + float3 raw_normal = cross(v2v0, v1v0); + const float norm_normal = norm(raw_normal); + float3 normal = normalize(raw_normal); + + // p0 is the projection of p on the plane spanned by (v0, v1, v2) + // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal + const float t = dot(v0 - p, normal); + const float3 p0 = p + t * normal; + const float3 diff = t * normal; + + bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area); + + float3 grad_p = make_float3(0.0f, 0.0f, 0.0f); + float3 grad_v0 = make_float3(0.0f, 0.0f, 0.0f); + float3 grad_v1 = make_float3(0.0f, 0.0f, 0.0f); + float3 grad_v2 = make_float3(0.0f, 0.0f, 0.0f); + + if ((is_inside) && (norm_normal > kEpsilon)) { + // derivative of dist wrt p + grad_p = -2.0f * grad_dist * t * normal; + // derivative of dist wrt normal + const float3 grad_normal = 2.0f * grad_dist * t * (v0p + diff); + // derivative of dist wrt raw_normal + const float3 grad_raw_normal = normalize_backward(raw_normal, grad_normal); + // derivative of dist wrt v2v0 and v1v0 + const auto grad_cross = cross_backward(v2v0, v1v0, grad_raw_normal); + const float3 grad_cross_v2v0 = thrust::get<0>(grad_cross); + const float3 grad_cross_v1v0 = thrust::get<1>(grad_cross); + grad_v0 = + grad_dist * 2.0f * t * normal - (grad_cross_v2v0 + grad_cross_v1v0); + grad_v1 = grad_cross_v1v0; + grad_v2 = grad_cross_v2v0; + } else { + const float e01 = PointLine3DistanceForward(p, v0, v1); + const float e02 = PointLine3DistanceForward(p, v0, v2); + const float e12 = PointLine3DistanceForward(p, v1, v2); + + if ((e01 <= e02) && (e01 <= e12)) { + // e01 is smallest + const auto grads = PointLine3DistanceBackward(p, v0, v1, grad_dist); + grad_p = thrust::get<0>(grads); + grad_v0 = thrust::get<1>(grads); + grad_v1 = thrust::get<2>(grads); + } else if ((e02 <= e01) && (e02 <= e12)) { + // e02 is smallest + const auto grads = PointLine3DistanceBackward(p, v0, v2, grad_dist); + grad_p = thrust::get<0>(grads); + grad_v0 = thrust::get<1>(grads); + grad_v2 = thrust::get<2>(grads); + } else if ((e12 <= e01) && (e12 <= e02)) { + // e12 is smallest + const auto grads = PointLine3DistanceBackward(p, v1, v2, grad_dist); + grad_p = thrust::get<0>(grads); + grad_v1 = thrust::get<1>(grads); + grad_v2 = thrust::get<2>(grads); + } + } + + return thrust::make_tuple(grad_p, grad_v0, grad_v1, grad_v2); +} diff --git a/pytorch3d_simplified/pytorch3d/csrc/utils/geometry_utils.h b/pytorch3d_simplified/pytorch3d/csrc/utils/geometry_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ad9f7ff3f34dde7b119ea708eb0901cb826794d7 --- /dev/null +++ b/pytorch3d_simplified/pytorch3d/csrc/utils/geometry_utils.h @@ -0,0 +1,823 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include "vec2.h" +#include "vec3.h" + +// Set epsilon for preventing floating point errors and division by 0. +const auto kEpsilon = 1e-8; + +// Determines whether a point p is on the right side of a 2D line segment +// given by the end points v0, v1. +// +// Args: +// p: vec2 Coordinates of a point. +// v0, v1: vec2 Coordinates of the end points of the edge. +// +// Returns: +// area: The signed area of the parallelogram given by the vectors +// A = p - v0 +// B = v1 - v0 +// +// v1 ________ +// /\ / +// A / \ / +// / \ / +// v0 /______\/ +// B p +// +// The area can also be interpreted as the cross product A x B. +// If the sign of the area is positive, the point p is on the +// right side of the edge. Negative area indicates the point is on +// the left side of the edge. i.e. for an edge v1 - v0: +// +// v1 +// / +// / +// - / + +// / +// / +// v0 +// +template +T EdgeFunctionForward(const vec2& p, const vec2& v0, const vec2& v1) { + const T edge = (p.x - v0.x) * (v1.y - v0.y) - (p.y - v0.y) * (v1.x - v0.x); + return edge; +} + +// Backward pass for the edge function returning partial dervivatives for each +// of the input points. +// +// Args: +// p: vec2 Coordinates of a point. +// v0, v1: vec2 Coordinates of the end points of the edge. +// grad_edge: Upstream gradient for output from edge function. +// +// Returns: +// tuple of gradients for each of the input points: +// (vec2 d_edge_dp, vec2 d_edge_dv0, vec2 d_edge_dv1) +// +template +inline std::tuple, vec2, vec2> EdgeFunctionBackward( + const vec2& p, + const vec2& v0, + const vec2& v1, + const T grad_edge) { + const vec2 dedge_dp(v1.y - v0.y, v0.x - v1.x); + const vec2 dedge_dv0(p.y - v1.y, v1.x - p.x); + const vec2 dedge_dv1(v0.y - p.y, p.x - v0.x); + return std::make_tuple( + grad_edge * dedge_dp, grad_edge * dedge_dv0, grad_edge * dedge_dv1); +} + +// The forward pass for computing the barycentric coordinates of a point +// relative to a triangle. +// Ref: +// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/barycentric-coordinates +// +// Args: +// p: Coordinates of a point. +// v0, v1, v2: Coordinates of the triangle vertices. +// +// Returns +// bary: (w0, w1, w2) barycentric coordinates in the range [0, 1]. +// +template +vec3 BarycentricCoordinatesForward( + const vec2& p, + const vec2& v0, + const vec2& v1, + const vec2& v2) { + const T area = EdgeFunctionForward(v2, v0, v1) + kEpsilon; + const T w0 = EdgeFunctionForward(p, v1, v2) / area; + const T w1 = EdgeFunctionForward(p, v2, v0) / area; + const T w2 = EdgeFunctionForward(p, v0, v1) / area; + return vec3(w0, w1, w2); +} + +// The backward pass for computing the barycentric coordinates of a point +// relative to a triangle. +// +// Args: +// p: Coordinates of a point. +// v0, v1, v2: (x, y) coordinates of the triangle vertices. +// grad_bary_upstream: vec3 Upstream gradient for each of the +// barycentric coordaintes [grad_w0, grad_w1, grad_w2]. +// +// Returns +// tuple of gradients for each of the triangle vertices: +// (vec2 grad_v0, vec2 grad_v1, vec2 grad_v2) +// +template +inline std::tuple, vec2, vec2, vec2> BarycentricCoordsBackward( + const vec2& p, + const vec2& v0, + const vec2& v1, + const vec2& v2, + const vec3& grad_bary_upstream) { + const T area = EdgeFunctionForward(v2, v0, v1) + kEpsilon; + const T area2 = pow(area, 2.0f); + const T area_inv = 1.0f / area; + const T e0 = EdgeFunctionForward(p, v1, v2); + const T e1 = EdgeFunctionForward(p, v2, v0); + const T e2 = EdgeFunctionForward(p, v0, v1); + + const T grad_w0 = grad_bary_upstream.x; + const T grad_w1 = grad_bary_upstream.y; + const T grad_w2 = grad_bary_upstream.z; + + // Calculate component of the gradient from each of w0, w1 and w2. + // e.g. for w0: + // dloss/dw0_v = dl/dw0 * dw0/dw0_top * dw0_top/dv + // + dl/dw0 * dw0/dw0_bot * dw0_bot/dv + const T dw0_darea = -e0 / (area2); + const T dw0_e0 = area_inv; + const T dloss_d_w0area = grad_w0 * dw0_darea; + const T dloss_e0 = grad_w0 * dw0_e0; + auto de0_dv = EdgeFunctionBackward(p, v1, v2, dloss_e0); + auto dw0area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w0area); + const vec2 dw0_p = std::get<0>(de0_dv); + const vec2 dw0_dv0 = std::get<1>(dw0area_dv); + const vec2 dw0_dv1 = std::get<1>(de0_dv) + std::get<2>(dw0area_dv); + const vec2 dw0_dv2 = std::get<2>(de0_dv) + std::get<0>(dw0area_dv); + + const T dw1_darea = -e1 / (area2); + const T dw1_e1 = area_inv; + const T dloss_d_w1area = grad_w1 * dw1_darea; + const T dloss_e1 = grad_w1 * dw1_e1; + auto de1_dv = EdgeFunctionBackward(p, v2, v0, dloss_e1); + auto dw1area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w1area); + const vec2 dw1_p = std::get<0>(de1_dv); + const vec2 dw1_dv0 = std::get<2>(de1_dv) + std::get<1>(dw1area_dv); + const vec2 dw1_dv1 = std::get<2>(dw1area_dv); + const vec2 dw1_dv2 = std::get<1>(de1_dv) + std::get<0>(dw1area_dv); + + const T dw2_darea = -e2 / (area2); + const T dw2_e2 = area_inv; + const T dloss_d_w2area = grad_w2 * dw2_darea; + const T dloss_e2 = grad_w2 * dw2_e2; + auto de2_dv = EdgeFunctionBackward(p, v0, v1, dloss_e2); + auto dw2area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w2area); + const vec2 dw2_p = std::get<0>(de2_dv); + const vec2 dw2_dv0 = std::get<1>(de2_dv) + std::get<1>(dw2area_dv); + const vec2 dw2_dv1 = std::get<2>(de2_dv) + std::get<2>(dw2area_dv); + const vec2 dw2_dv2 = std::get<0>(dw2area_dv); + + const vec2 dbary_p = dw0_p + dw1_p + dw2_p; + const vec2 dbary_dv0 = dw0_dv0 + dw1_dv0 + dw2_dv0; + const vec2 dbary_dv1 = dw0_dv1 + dw1_dv1 + dw2_dv1; + const vec2 dbary_dv2 = dw0_dv2 + dw1_dv2 + dw2_dv2; + + return std::make_tuple(dbary_p, dbary_dv0, dbary_dv1, dbary_dv2); +} + +// Forward pass for applying perspective correction to barycentric coordinates. +// +// Args: +// bary: Screen-space barycentric coordinates for a point +// z0, z1, z2: Camera-space z-coordinates of the triangle vertices +// +// Returns +// World-space barycentric coordinates +// +template +inline vec3 BarycentricPerspectiveCorrectionForward( + const vec3& bary, + const T z0, + const T z1, + const T z2) { + const T w0_top = bary.x * z1 * z2; + const T w1_top = bary.y * z0 * z2; + const T w2_top = bary.z * z0 * z1; + const T denom = std::max(w0_top + w1_top + w2_top, kEpsilon); + const T w0 = w0_top / denom; + const T w1 = w1_top / denom; + const T w2 = w2_top / denom; + return vec3(w0, w1, w2); +} + +// Backward pass for applying perspective correction to barycentric coordinates. +// +// Args: +// bary: Screen-space barycentric coordinates for a point +// z0, z1, z2: Camera-space z-coordinates of the triangle vertices +// grad_out: Upstream gradient of the loss with respect to the corrected +// barycentric coordinates. +// +// Returns a tuple of: +// grad_bary: Downstream gradient of the loss with respect to the the +// uncorrected barycentric coordinates. +// grad_z0, grad_z1, grad_z2: Downstream gradient of the loss with respect +// to the z-coordinates of the triangle verts +template +inline std::tuple, T, T, T> BarycentricPerspectiveCorrectionBackward( + const vec3& bary, + const T z0, + const T z1, + const T z2, + const vec3& grad_out) { + // Recompute forward pass + const T w0_top = bary.x * z1 * z2; + const T w1_top = bary.y * z0 * z2; + const T w2_top = bary.z * z0 * z1; + const T denom = std::max(w0_top + w1_top + w2_top, kEpsilon); + + // Now do backward pass + const T grad_denom_top = + -w0_top * grad_out.x - w1_top * grad_out.y - w2_top * grad_out.z; + const T grad_denom = grad_denom_top / (denom * denom); + const T grad_w0_top = grad_denom + grad_out.x / denom; + const T grad_w1_top = grad_denom + grad_out.y / denom; + const T grad_w2_top = grad_denom + grad_out.z / denom; + const T grad_bary_x = grad_w0_top * z1 * z2; + const T grad_bary_y = grad_w1_top * z0 * z2; + const T grad_bary_z = grad_w2_top * z0 * z1; + const vec3 grad_bary(grad_bary_x, grad_bary_y, grad_bary_z); + const T grad_z0 = grad_w1_top * bary.y * z2 + grad_w2_top * bary.z * z1; + const T grad_z1 = grad_w0_top * bary.x * z2 + grad_w2_top * bary.z * z0; + const T grad_z2 = grad_w0_top * bary.x * z1 + grad_w1_top * bary.y * z0; + return std::make_tuple(grad_bary, grad_z0, grad_z1, grad_z2); +} + +// Clip negative barycentric coordinates to 0.0 and renormalize so +// the barycentric coordinates for a point sum to 1. When the blur_radius +// is greater than 0, a face will still be recorded as overlapping a pixel +// if the pixel is outside the face. In this case at least one of the +// barycentric coordinates for the pixel relative to the face will be negative. +// Clipping will ensure that the texture and z buffer are interpolated +// correctly. +// +// Args +// bary: (w0, w1, w2) barycentric coordinates which can contain values < 0. +// +// Returns +// bary: (w0, w1, w2) barycentric coordinates in the range [0, 1] which +// satisfy the condition: sum(w0, w1, w2) = 1.0. +// +template +vec3 BarycentricClipForward(const vec3 bary) { + vec3 w(0.0f, 0.0f, 0.0f); + // Only clamp negative values to 0.0. + // No need to clamp values > 1.0 as they will be renormalized. + w.x = std::max(bary.x, 0.0f); + w.y = std::max(bary.y, 0.0f); + w.z = std::max(bary.z, 0.0f); + float w_sum = w.x + w.y + w.z; + w_sum = std::fmaxf(w_sum, 1e-5); + w.x /= w_sum; + w.y /= w_sum; + w.z /= w_sum; + return w; +} + +// Backward pass for barycentric coordinate clipping. +// +// Args +// bary: (w0, w1, w2) barycentric coordinates which can contain values < 0. +// grad_baryclip_upstream: vec3 Upstream gradient for each of the clipped +// barycentric coordinates [grad_w0, grad_w1, grad_w2]. +// +// Returns +// vec3 of gradients for the unclipped barycentric coordinates: +// (grad_w0, grad_w1, grad_w2) +// +template +vec3 BarycentricClipBackward( + const vec3 bary, + const vec3 grad_baryclip_upstream) { + // Redo some of the forward pass calculations + vec3 w(0.0f, 0.0f, 0.0f); + w.x = std::max(bary.x, 0.0f); + w.y = std::max(bary.y, 0.0f); + w.z = std::max(bary.z, 0.0f); + float w_sum = w.x + w.y + w.z; + + vec3 grad_bary(1.0f, 1.0f, 1.0f); + vec3 grad_clip(1.0f, 1.0f, 1.0f); + vec3 grad_sum(1.0f, 1.0f, 1.0f); + + // Check if the sum was clipped. + float grad_sum_clip = 1.0f; + if (w_sum < 1e-5) { + grad_sum_clip = 0.0f; + w_sum = 1e-5; + } + + // Check if any of the bary coordinates have been clipped. + // Only negative values are clamped to 0.0. + if (bary.x < 0.0f) { + grad_clip.x = 0.0f; + } + if (bary.y < 0.0f) { + grad_clip.y = 0.0f; + } + if (bary.z < 0.0f) { + grad_clip.z = 0.0f; + } + + // Gradients of the sum. + grad_sum.x = -w.x / (pow(w_sum, 2.0f)) * grad_sum_clip; + grad_sum.y = -w.y / (pow(w_sum, 2.0f)) * grad_sum_clip; + grad_sum.z = -w.z / (pow(w_sum, 2.0f)) * grad_sum_clip; + + // Gradients for each of the bary coordinates including the cross terms + // from the sum. + grad_bary.x = grad_clip.x * + (grad_baryclip_upstream.x * (1.0f / w_sum + grad_sum.x) + + grad_baryclip_upstream.y * (grad_sum.y) + + grad_baryclip_upstream.z * (grad_sum.z)); + + grad_bary.y = grad_clip.y * + (grad_baryclip_upstream.y * (1.0f / w_sum + grad_sum.y) + + grad_baryclip_upstream.x * (grad_sum.x) + + grad_baryclip_upstream.z * (grad_sum.z)); + + grad_bary.z = grad_clip.z * + (grad_baryclip_upstream.z * (1.0f / w_sum + grad_sum.z) + + grad_baryclip_upstream.x * (grad_sum.x) + + grad_baryclip_upstream.y * (grad_sum.y)); + + return grad_bary; +} + +// Calculate minimum distance between a line segment (v1 - v0) and point p. +// +// Args: +// p: Coordinates of a point. +// v0, v1: Coordinates of the end points of the line segment. +// +// Returns: +// squared distance of the point to the line. +// +// Consider the line extending the segment - this can be parameterized as: +// v0 + t (v1 - v0). +// +// First find the projection of point p onto the line. It falls where: +// t = [(p - v0) . (v1 - v0)] / |v1 - v0|^2 +// where . is the dot product. +// +// The parameter t is clamped from [0, 1] to handle points outside the +// segment (v1 - v0). +// +// Once the projection of the point on the segment is known, the distance from +// p to the projection gives the minimum distance to the segment. +// +template +T PointLineDistanceForward( + const vec2& p, + const vec2& v0, + const vec2& v1) { + const vec2 v1v0 = v1 - v0; + const T l2 = dot(v1v0, v1v0); + if (l2 <= kEpsilon) { + return dot(p - v1, p - v1); + } + + const T t = dot(v1v0, p - v0) / l2; + const T tt = std::min(std::max(t, 0.00f), 1.00f); + const vec2 p_proj = v0 + tt * v1v0; + return dot(p - p_proj, p - p_proj); +} + +template +T PointLine3DistanceForward( + const vec3& p, + const vec3& v0, + const vec3& v1) { + const vec3 v1v0 = v1 - v0; + const T l2 = dot(v1v0, v1v0); + if (l2 <= kEpsilon) { + return dot(p - v1, p - v1); + } + + const T t = dot(v1v0, p - v0) / l2; + const T tt = std::min(std::max(t, 0.00f), 1.00f); + const vec3 p_proj = v0 + tt * v1v0; + return dot(p - p_proj, p - p_proj); +} + +// Backward pass for point to line distance in 2D. +// +// Args: +// p: Coordinates of a point. +// v0, v1: Coordinates of the end points of the line segment. +// grad_dist: Upstream gradient for the distance. +// +// Returns: +// tuple of gradients for each of the input points: +// (vec2 grad_p, vec2 grad_v0, vec2 grad_v1) +// +template +inline std::tuple, vec2, vec2> PointLineDistanceBackward( + const vec2& p, + const vec2& v0, + const vec2& v1, + const T& grad_dist) { + // Redo some of the forward pass calculations. + const vec2 v1v0 = v1 - v0; + const vec2 pv0 = p - v0; + const T t_bot = dot(v1v0, v1v0); + const T t_top = dot(v1v0, pv0); + const T t = t_top / t_bot; + const T tt = std::min(std::max(t, 0.00f), 1.00f); + const vec2 p_proj = (1.0f - tt) * v0 + tt * v1; + + const vec2 grad_v0 = grad_dist * (1.0f - tt) * 2.0f * (p_proj - p); + const vec2 grad_v1 = grad_dist * tt * 2.0f * (p_proj - p); + const vec2 grad_p = -1.0f * grad_dist * 2.0f * (p_proj - p); + + return std::make_tuple(grad_p, grad_v0, grad_v1); +} + +template +std::tuple, vec3, vec3> PointLine3DistanceBackward( + const vec3& p, + const vec3& v0, + const vec3& v1, + const T& grad_dist) { + const vec3 v1v0 = v1 - v0; + const vec3 pv0 = p - v0; + const T t_bot = dot(v1v0, v1v0); + const T t_top = dot(v1v0, pv0); + + vec3 grad_p{0.0f, 0.0f, 0.0f}; + vec3 grad_v0{0.0f, 0.0f, 0.0f}; + vec3 grad_v1{0.0f, 0.0f, 0.0f}; + + const T tt = t_top / t_bot; + + if (t_bot < kEpsilon) { + // if t_bot small, then v0 == v1, + // and dist = 0.5 * dot(pv0, pv0) + 0.5 * dot(pv1, pv1) + grad_p = grad_dist * 2.0f * pv0; + grad_v0 = -0.5f * grad_p; + grad_v1 = grad_v0; + } else if (tt < 0.0f) { + grad_p = grad_dist * 2.0f * pv0; + grad_v0 = -1.0f * grad_p; + // no gradients wrt v1 + } else if (tt > 1.0f) { + grad_p = grad_dist * 2.0f * (p - v1); + grad_v1 = -1.0f * grad_p; + // no gradients wrt v0 + } else { + const vec3 p_proj = v0 + tt * v1v0; + const vec3 diff = p - p_proj; + const vec3 grad_base = grad_dist * 2.0f * diff; + grad_p = grad_base - dot(grad_base, v1v0) * v1v0 / t_bot; + const vec3 dtt_v0 = (-1.0f * v1v0 - pv0 + 2.0f * tt * v1v0) / t_bot; + grad_v0 = (-1.0f + tt) * grad_base - dot(grad_base, v1v0) * dtt_v0; + const vec3 dtt_v1 = (pv0 - 2.0f * tt * v1v0) / t_bot; + grad_v1 = -dot(grad_base, v1v0) * dtt_v1 - tt * grad_base; + } + + return std::make_tuple(grad_p, grad_v0, grad_v1); +} + +// The forward pass for calculating the shortest distance between a point +// and a triangle. +// Ref: https://www.randygaul.net/2014/07/23/distance-point-to-line-segment/ +// +// Args: +// p: Coordinates of a point. +// v0, v1, v2: Coordinates of the three triangle vertices. +// +// Returns: +// shortest squared distance from a point to a triangle. +// +// +template +T PointTriangleDistanceForward( + const vec2& p, + const vec2& v0, + const vec2& v1, + const vec2& v2) { + // Compute distance of point to 3 edges of the triangle and return the + // minimum value. + const T e01_dist = PointLineDistanceForward(p, v0, v1); + const T e02_dist = PointLineDistanceForward(p, v0, v2); + const T e12_dist = PointLineDistanceForward(p, v1, v2); + const T edge_dist = std::min(std::min(e01_dist, e02_dist), e12_dist); + + return edge_dist; +} + +// Backward pass for point triangle distance. +// +// Args: +// p: Coordinates of a point. +// v0, v1, v2: Coordinates of the three triangle vertices. +// grad_dist: Upstream gradient for the distance. +// +// Returns: +// tuple of gradients for each of the triangle vertices: +// (vec2 grad_v0, vec2 grad_v1, vec2 grad_v2) +// +template +inline std::tuple, vec2, vec2, vec2> +PointTriangleDistanceBackward( + const vec2& p, + const vec2& v0, + const vec2& v1, + const vec2& v2, + const T& grad_dist) { + // Compute distance to all 3 edges of the triangle. + const T e01_dist = PointLineDistanceForward(p, v0, v1); + const T e02_dist = PointLineDistanceForward(p, v0, v2); + const T e12_dist = PointLineDistanceForward(p, v1, v2); + + // Initialize output tensors. + vec2 grad_v0(0.0f, 0.0f); + vec2 grad_v1(0.0f, 0.0f); + vec2 grad_v2(0.0f, 0.0f); + vec2 grad_p(0.0f, 0.0f); + + // Find which edge is the closest and return PointLineDistanceBackward for + // that edge. + if (e01_dist <= e02_dist && e01_dist <= e12_dist) { + // Closest edge is v1 - v0. + auto grad_e01 = PointLineDistanceBackward(p, v0, v1, grad_dist); + grad_p = std::get<0>(grad_e01); + grad_v0 = std::get<1>(grad_e01); + grad_v1 = std::get<2>(grad_e01); + } else if (e02_dist <= e01_dist && e02_dist <= e12_dist) { + // Closest edge is v2 - v0. + auto grad_e02 = PointLineDistanceBackward(p, v0, v2, grad_dist); + grad_p = std::get<0>(grad_e02); + grad_v0 = std::get<1>(grad_e02); + grad_v2 = std::get<2>(grad_e02); + } else if (e12_dist <= e01_dist && e12_dist <= e02_dist) { + // Closest edge is v2 - v1. + auto grad_e12 = PointLineDistanceBackward(p, v1, v2, grad_dist); + grad_p = std::get<0>(grad_e12); + grad_v1 = std::get<1>(grad_e12); + grad_v2 = std::get<2>(grad_e12); + } + + return std::make_tuple(grad_p, grad_v0, grad_v1, grad_v2); +} + +// Computes the area of a triangle (v0, v1, v2). +// Args: +// v0, v1, v2: vec3 coordinates of the triangle vertices +// +// Returns: +// area: float: the area of the triangle +// +template +T AreaOfTriangle(const vec3& v0, const vec3& v1, const vec3& v2) { + vec3 p0 = v1 - v0; + vec3 p1 = v2 - v0; + + // compute the hypotenus of the scross product (p0 x p1) + float dd = std::hypot( + p0.y * p1.z - p0.z * p1.y, + std::hypot(p0.z * p1.x - p0.x * p1.z, p0.x * p1.y - p0.y * p1.x)); + + return dd / 2.0; +} + +// Computes the squared distance of a point p relative to a triangle (v0, v1, +// v2). If the point's projection p0 on the plane spanned by (v0, v1, v2) is +// inside the triangle with vertices (v0, v1, v2), then the returned value is +// the squared distance of p to its projection p0. Otherwise, the returned value +// is the smallest squared distance of p from the line segments (v0, v1), (v0, +// v2) and (v1, v2). +// +// Args: +// p: vec3 coordinates of a point +// v0, v1, v2: vec3 coordinates of the triangle vertices +// +// Returns: +// dist: Float of the squared distance +// + +const float vEpsilon = 1e-8; + +template +vec3 BarycentricCoords3Forward( + const vec3& p, + const vec3& v0, + const vec3& v1, + const vec3& v2) { + vec3 p0 = v1 - v0; + vec3 p1 = v2 - v0; + vec3 p2 = p - v0; + + const T d00 = dot(p0, p0); + const T d01 = dot(p0, p1); + const T d11 = dot(p1, p1); + const T d20 = dot(p2, p0); + const T d21 = dot(p2, p1); + + const T denom = d00 * d11 - d01 * d01 + kEpsilon; + const T w1 = (d11 * d20 - d01 * d21) / denom; + const T w2 = (d00 * d21 - d01 * d20) / denom; + const T w0 = 1.0f - w1 - w2; + + return vec3(w0, w1, w2); +} + +// Checks whether the point p is inside the triangle (v0, v1, v2). +// A point is inside the triangle, if all barycentric coordinates +// wrt the triangle are >= 0 & <= 1. +// If the triangle is degenerate, aka line or point, then return False. +// +// NOTE that this function assumes that p lives on the space spanned +// by (v0, v1, v2). +// TODO(gkioxari) explicitly check whether p is coplanar with (v0, v1, v2) +// and throw an error if check fails +// +// Args: +// p: vec3 coordinates of a point +// v0, v1, v2: vec3 coordinates of the triangle vertices +// min_triangle_area: triangles less than this size are considered +// points/lines, IsInsideTriangle returns False +// +// Returns: +// inside: bool indicating wether p is inside triangle +// +template +static bool IsInsideTriangle( + const vec3& p, + const vec3& v0, + const vec3& v1, + const vec3& v2, + const double min_triangle_area) { + bool inside; + if (AreaOfTriangle(v0, v1, v2) < min_triangle_area) { + inside = 0; + } else { + vec3 bary = BarycentricCoords3Forward(p, v0, v1, v2); + bool x_in = 0.0f <= bary.x && bary.x <= 1.0f; + bool y_in = 0.0f <= bary.y && bary.y <= 1.0f; + bool z_in = 0.0f <= bary.z && bary.z <= 1.0f; + inside = x_in && y_in && z_in; + } + return inside; +} + +template +T PointTriangle3DistanceForward( + const vec3& p, + const vec3& v0, + const vec3& v1, + const vec3& v2, + const double min_triangle_area) { + vec3 normal = cross(v2 - v0, v1 - v0); + const T norm_normal = norm(normal); + normal = normal / (norm_normal + vEpsilon); + + // p0 is the projection of p on the plane spanned by (v0, v1, v2) + // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal + const T t = dot(v0 - p, normal); + const vec3 p0 = p + t * normal; + + bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area); + T dist = 0.0f; + + if ((is_inside) && (norm_normal > kEpsilon)) { + // if projection p0 is inside triangle spanned by (v0, v1, v2) + // then distance is equal to norm(p0 - p)^2 + dist = t * t; + } else { + const float e01 = PointLine3DistanceForward(p, v0, v1); + const float e02 = PointLine3DistanceForward(p, v0, v2); + const float e12 = PointLine3DistanceForward(p, v1, v2); + + dist = (e01 > e02) ? e02 : e01; + dist = (dist > e12) ? e12 : dist; + } + + return dist; +} + +template +std::tuple, vec3> +cross_backward(const vec3& a, const vec3& b, const vec3& grad_cross) { + const float grad_ax = -grad_cross.y * b.z + grad_cross.z * b.y; + const float grad_ay = grad_cross.x * b.z - grad_cross.z * b.x; + const float grad_az = -grad_cross.x * b.y + grad_cross.y * b.x; + const vec3 grad_a = vec3(grad_ax, grad_ay, grad_az); + + const float grad_bx = grad_cross.y * a.z - grad_cross.z * a.y; + const float grad_by = -grad_cross.x * a.z + grad_cross.z * a.x; + const float grad_bz = grad_cross.x * a.y - grad_cross.y * a.x; + const vec3 grad_b = vec3(grad_bx, grad_by, grad_bz); + + return std::make_tuple(grad_a, grad_b); +} + +template +vec3 normalize_backward(const vec3& a, const vec3& grad_normz) { + const float a_norm = norm(a) + vEpsilon; + const vec3 out = a / a_norm; + + const float grad_ax = grad_normz.x * (1.0f - out.x * out.x) / a_norm + + grad_normz.y * (-out.x * out.y) / a_norm + + grad_normz.z * (-out.x * out.z) / a_norm; + const float grad_ay = grad_normz.x * (-out.x * out.y) / a_norm + + grad_normz.y * (1.0f - out.y * out.y) / a_norm + + grad_normz.z * (-out.y * out.z) / a_norm; + const float grad_az = grad_normz.x * (-out.x * out.z) / a_norm + + grad_normz.y * (-out.y * out.z) / a_norm + + grad_normz.z * (1.0f - out.z * out.z) / a_norm; + return vec3(grad_ax, grad_ay, grad_az); +} + +// The backward pass for computing the squared distance of a point +// to the triangle (v0, v1, v2). +// +// Args: +// p: xyz coordinates of a point +// v0, v1, v2: xyz coordinates of the triangle vertices +// grad_dist: Float of the gradient wrt dist +// min_triangle_area: triangles less than this size are considered +// points/lines, IsInsideTriangle returns False +// +// Returns: +// tuple of gradients for the point and triangle: +// (float3 grad_p, float3 grad_v0, float3 grad_v1, float3 grad_v2) +// + +template +static std::tuple, vec3, vec3, vec3> +PointTriangle3DistanceBackward( + const vec3& p, + const vec3& v0, + const vec3& v1, + const vec3& v2, + const T& grad_dist, + const double min_triangle_area) { + const vec3 v2v0 = v2 - v0; + const vec3 v1v0 = v1 - v0; + const vec3 v0p = v0 - p; + vec3 raw_normal = cross(v2v0, v1v0); + const T norm_normal = norm(raw_normal); + vec3 normal = raw_normal / (norm_normal + vEpsilon); + + // p0 is the projection of p on the plane spanned by (v0, v1, v2) + // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal + const T t = dot(v0 - p, normal); + const vec3 p0 = p + t * normal; + const vec3 diff = t * normal; + + bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area); + + vec3 grad_p(0.0f, 0.0f, 0.0f); + vec3 grad_v0(0.0f, 0.0f, 0.0f); + vec3 grad_v1(0.0f, 0.0f, 0.0f); + vec3 grad_v2(0.0f, 0.0f, 0.0f); + + if ((is_inside) && (norm_normal > kEpsilon)) { + // derivative of dist wrt p + grad_p = -2.0f * grad_dist * t * normal; + // derivative of dist wrt normal + const vec3 grad_normal = 2.0f * grad_dist * t * (v0p + diff); + // derivative of dist wrt raw_normal + const vec3 grad_raw_normal = normalize_backward(raw_normal, grad_normal); + // derivative of dist wrt v2v0 and v1v0 + const auto grad_cross = cross_backward(v2v0, v1v0, grad_raw_normal); + const vec3 grad_cross_v2v0 = std::get<0>(grad_cross); + const vec3 grad_cross_v1v0 = std::get<1>(grad_cross); + grad_v0 = + grad_dist * 2.0f * t * normal - (grad_cross_v2v0 + grad_cross_v1v0); + grad_v1 = grad_cross_v1v0; + grad_v2 = grad_cross_v2v0; + } else { + const T e01 = PointLine3DistanceForward(p, v0, v1); + const T e02 = PointLine3DistanceForward(p, v0, v2); + const T e12 = PointLine3DistanceForward(p, v1, v2); + + if ((e01 <= e02) && (e01 <= e12)) { + // e01 is smallest + const auto grads = PointLine3DistanceBackward(p, v0, v1, grad_dist); + grad_p = std::get<0>(grads); + grad_v0 = std::get<1>(grads); + grad_v1 = std::get<2>(grads); + } else if ((e02 <= e01) && (e02 <= e12)) { + // e02 is smallest + const auto grads = PointLine3DistanceBackward(p, v0, v2, grad_dist); + grad_p = std::get<0>(grads); + grad_v0 = std::get<1>(grads); + grad_v2 = std::get<2>(grads); + } else if ((e12 <= e01) && (e12 <= e02)) { + // e12 is smallest + const auto grads = PointLine3DistanceBackward(p, v1, v2, grad_dist); + grad_p = std::get<0>(grads); + grad_v1 = std::get<1>(grads); + grad_v2 = std::get<2>(grads); + } + } + + return std::make_tuple(grad_p, grad_v0, grad_v1, grad_v2); +} diff --git a/pytorch3d_simplified/pytorch3d/csrc/utils/warp_reduce.cuh b/pytorch3d_simplified/pytorch3d/csrc/utils/warp_reduce.cuh new file mode 100644 index 0000000000000000000000000000000000000000..3c903019debf5db594a6c71e1296ccd764991736 --- /dev/null +++ b/pytorch3d_simplified/pytorch3d/csrc/utils/warp_reduce.cuh @@ -0,0 +1,94 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +// Helper functions WarpReduceMin and WarpReduceMax used in .cu files +// Starting in Volta, instructions are no longer synchronous within a warp. +// We need to call __syncwarp() to sync the 32 threads in the warp +// instead of all the threads in the block. + +template +__device__ void +WarpReduceMin(scalar_t* min_dists, int64_t* min_idxs, const size_t tid) { + // s = 32 + if (min_dists[tid] > min_dists[tid + 32]) { + min_idxs[tid] = min_idxs[tid + 32]; + min_dists[tid] = min_dists[tid + 32]; + } + __syncwarp(); + // s = 16 + if (min_dists[tid] > min_dists[tid + 16]) { + min_idxs[tid] = min_idxs[tid + 16]; + min_dists[tid] = min_dists[tid + 16]; + } + __syncwarp(); + // s = 8 + if (min_dists[tid] > min_dists[tid + 8]) { + min_idxs[tid] = min_idxs[tid + 8]; + min_dists[tid] = min_dists[tid + 8]; + } + __syncwarp(); + // s = 4 + if (min_dists[tid] > min_dists[tid + 4]) { + min_idxs[tid] = min_idxs[tid + 4]; + min_dists[tid] = min_dists[tid + 4]; + } + __syncwarp(); + // s = 2 + if (min_dists[tid] > min_dists[tid + 2]) { + min_idxs[tid] = min_idxs[tid + 2]; + min_dists[tid] = min_dists[tid + 2]; + } + __syncwarp(); + // s = 1 + if (min_dists[tid] > min_dists[tid + 1]) { + min_idxs[tid] = min_idxs[tid + 1]; + min_dists[tid] = min_dists[tid + 1]; + } + __syncwarp(); +} + +template +__device__ void WarpReduceMax( + volatile scalar_t* dists, + volatile int64_t* dists_idx, + const size_t tid) { + if (dists[tid] < dists[tid + 32]) { + dists[tid] = dists[tid + 32]; + dists_idx[tid] = dists_idx[tid + 32]; + } + __syncwarp(); + if (dists[tid] < dists[tid + 16]) { + dists[tid] = dists[tid + 16]; + dists_idx[tid] = dists_idx[tid + 16]; + } + __syncwarp(); + if (dists[tid] < dists[tid + 8]) { + dists[tid] = dists[tid + 8]; + dists_idx[tid] = dists_idx[tid + 8]; + } + __syncwarp(); + if (dists[tid] < dists[tid + 4]) { + dists[tid] = dists[tid + 4]; + dists_idx[tid] = dists_idx[tid + 4]; + } + __syncwarp(); + if (dists[tid] < dists[tid + 2]) { + dists[tid] = dists[tid + 2]; + dists_idx[tid] = dists_idx[tid + 2]; + } + __syncwarp(); + if (dists[tid] < dists[tid + 1]) { + dists[tid] = dists[tid + 1]; + dists_idx[tid] = dists_idx[tid + 1]; + } + __syncwarp(); +}