// SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 #pragma once #include #include #include #include #ifndef _GEOMETRY_NO_TORCH #include #endif #include "cuda_intellisense.cuh" #ifndef __NVCC__ #define SORT_ALGO std::sort #define SWAP std::swap template using tuple_t = std::tuple; #else #include #include #define SORT_ALGO thrust::sort #define SWAP thrust::swap template using tuple_t = thrust::tuple; #endif template struct Point_ { typedef T inner_type; T X, Y; Point_() = default; __any_device__ Point_(T x, T y) : X(x), Y(y) {} __any_device__ Point_(T *ptr) : X(ptr[0]), Y(ptr[1]) {} #ifndef _GEOMETRY_NO_TORCH template __any_device__ Point_(const torch::TensorAccessor &accessor) : X(accessor[0]), Y(accessor[1]) {} template __any_device__ Point_(const torch::PackedTensorAccessor64 &accessor) : X(accessor[0]), Y(accessor[1]) {} #endif __any_device__ Point_ &operator+=(const Point_ &other); __any_device__ Point_ &operator-=(const Point_ &other); __any_device__ Point_ &operator*=(const Point_ &other); __any_device__ Point_ &operator/=(const Point_ &other); template __any_device__ Point_ &operator/=(W w); template __any_device__ Point_ &operator*=(W w); __any_device__ Point_ operator-() { return { -X, -Y }; } __any_device__ T Sum() const { return X + Y; } __any_device__ T Angle() const; __any_device__ void swap(Point_ &other) noexcept { SWAP(X, other.X); SWAP(Y, other.Y); } }; template __lib_inline__ __any_device__ void swap(Point_ &a, Point_ &b) { a.swap(b); } template __any_device__ __lib_inline__ T Point_::Angle() const { #ifndef __NVCC__ using std::atan2; #endif return atan2(Y, X); } template __any_device__ __lib_inline__ Point_ min(const Point_ &a, const Point_ &b) { #ifndef __NVCC__ using std::min; #endif return { min(a.X, b.X), min(a.Y, b.Y) }; } template __any_device__ __lib_inline__ Point_ max(const Point_ &a, const Point_ &b) { #ifndef __NVCC__ using std::max; #endif return { max(a.X, b.X), max(a.Y, b.Y) }; } template struct AABB_ { typedef T inner_type; T X; T Y; T MaxX; T MaxY; AABB_() = default; __any_device__ AABB_(T x, T y, T maxX, T maxY) : X(x), Y(y), MaxX(maxX), MaxY(maxY) {} __any_device__ bool Contains(const Point_ &p) const { return p.X >= X && p.X < MaxX && p.Y >= Y && p.Y < MaxY; } __any_device__ __lib_inline__ AABB_ Union(const AABB_ &other) const { #ifndef __NVCC__ using std::min; using std::max; #endif T minX = min(X, other.X); T maxX = max(MaxX, other.MaxX); T minY = min(Y, other.Y); T maxY = max(MaxY, other.MaxY); return { minX, minY, maxX, maxY }; } __any_device__ AABB_ &operator-=(const Point_ &offset) { X -= offset.X; MaxX -= offset.X; Y -= offset.Y; MaxY -= offset.Y; return *this; } __any_device__ __lib_inline__ T Width() const { return MaxX - X; } __any_device__ __lib_inline__ T Height() const { return MaxY - Y; } __any_device__ __lib_inline__ T Area() const { return Width() * Height(); } __lib_inline__ T &operator[] (int64_t idx) { static_assert(std::is_standard_layout>::value, "This function is only valid for standard layout"); return (&X)[idx]; } __lib_inline__ T operator[] (int64_t idx) const { static_assert(std::is_standard_layout>::value, "This function is only valid for standard layout"); return (&X)[idx]; } __any_device__ __lib_inline__ AABB_ Intersection(const AABB_ &other) const { #ifndef __NVCC__ using std::min; using std::max; #endif T minX = max(X, other.X); T minY = max(Y, other.Y); T maxX = min(MaxX, other.MaxX); T maxY = min(MaxY, other.MaxY); // Prevent negative area minX = min(minX, maxX); minY = min(minY, maxY); return { minX, minY, maxX, maxY }; } __any_device__ __lib_inline__ T IntersectionArea(const AABB_ &other) const { return Intersection(other).Area(); } }; template struct QuadBase_ { typedef T inner_type; __any_device__ AABB_ Bounds() const; __any_device__ bool Contains(const Point_ &p) const; __any_device__ T Area() const; __any_device__ T Height() const; __any_device__ T Width() const; template __any_device__ T IntersectionArea(const QuadBase_ &other) const; template __any_device__ T IOU(const QuadBase_ &other) const; template __any_device__ T IOU_UpperBound(const QuadBase_ &other) const; __any_device__ Point_ Center() const; template __any_device__ /* Returns 3 geometric associations between the two quads: 0: The percent shared area between this and other relative to this (e.g. if other contains this, then it returns 1) 1: The percent shared area between other and this relative to other (e.g. if this contains other, then it return 1) 2: The IOU of the two quads */ tuple_t RegionSizes(const QuadBase_ &other) const; template __any_device__ tuple_t RegionSizes_UpperBound(const QuadBase_ &other) const; __any_device__ Derived &operator/=(T val) { auto rcp = 1 / val; return *this *= rcp; } __any_device__ Derived &operator*=(T val) { auto dThis = static_cast(this); #pragma unroll for (size_t i = 0; i < 4; ++i) { dThis->Vertices[i] *= val; } return *dThis; } friend auto begin(const QuadBase_ &q) { return static_cast(q).Vertices; } friend auto begin(QuadBase_& q) { return static_cast(q).Vertices; } friend auto end(const QuadBase_ &q) { return static_cast(q).Vertices + 4; } friend auto end(QuadBase_ &q) { return static_cast(q).Vertices + 4; } }; template struct Quad_ : QuadBase_> { Point_ *Vertices = nullptr; Quad_() = default; __any_device__ Quad_(T *dataPtr) : Vertices(reinterpret_cast*>(dataPtr)) {} __any_device__ Quad_(Point_ *dataPtr) : Vertices(dataPtr) {} template __any_device__ __lib_inline__ const Point_ &operator[](index_t offset) const { return Vertices[offset]; } template __any_device__ __lib_inline__ Point_ &operator[](index_t offset) { return Vertices[offset]; } }; template struct InPlaceQuad_ : public QuadBase_> { Point_ Vertices[4]; InPlaceQuad_() = default; __any_device__ InPlaceQuad_(const T *dataPtr) { #if defined(__NVCC__) T *pVals = reinterpret_cast(Vertices); #pragma unroll for (uint32_t i = 0; i < 8; ++i) { pVals[i] = dataPtr[i]; } #else using std::copy; copy(dataPtr, dataPtr + 8, reinterpret_cast(Vertices)); #endif } __any_device__ InPlaceQuad_(const Point_ *dataPtr) { #if defined(__NVCC__) #pragma unroll for (uint32_t i = 0; i < 4; ++i) { Vertices[i] = dataPtr[i]; } #else using std::copy; copy(dataPtr, dataPtr + 4, Vertices); #endif } template __any_device__ __lib_inline__ const Point_ &operator[](index_t v) const { return Vertices[v]; } template __any_device__ __lib_inline__ Point_ &operator[](index_t v) { return Vertices[v]; } }; template struct PolygonBase_ { typedef T inner_type; __any_device__ AABB_ Bounds() const; __any_device__ bool Contains(const Point_ &p) const; __any_device__ T EdgeLength() const; __any_device__ Point_ Center() const; __any_device__ T Area() const; }; template struct Polygon_ : PolygonBase_> { Point_ *Vertices = nullptr; size_t Count = 0; Polygon_() = default; __any_device__ Polygon_(T *dataPtr, size_t vertexCount) : Vertices(reinterpret_cast*>(dataPtr)), Count(vertexCount) {} __any_device__ Polygon_(Point_ *dataPtr, size_t vertexCount) : Vertices(dataPtr), Count(vertexCount) {} __any_device__ const Point_ &operator[](size_t offset) const { return Vertices[offset]; } __any_device__ Point_ &operator[](size_t offset) { return Vertices[offset]; } }; template struct Segment_ { Point_ A, B; Segment_() = default; __any_device__ Segment_(const Point_ &a, const Point_ &b) : A(a), B(b) {} __any_device__ T Length() const; __any_device__ T LengthSq() const; __any_device__ bool Intersection(const Segment_ &other, Point_ &out_ptAlong) const; }; template __any_device__ __lib_inline__ Point_ operator+(const Point_ &a, const Point_ &b) { return { a.X + b.X, a.Y + b.Y }; } template __any_device__ __lib_inline__ Point_ operator-(const Point_ &a, const Point_ &b) { return { a.X - b.X, a.Y - b.Y }; } template __any_device__ __lib_inline__ Point_ operator*(W scale, const Point_ &p) { return { scale * p.X, scale * p.Y }; } template __any_device__ __lib_inline__ Point_ operator*(const Point_ &p, W scale) { return { scale * p.X, scale * p.Y }; } template __any_device__ __lib_inline__ Point_ operator/(const Point_ &p, W divisor) { return { p.X / divisor, p.Y / divisor }; } template __any_device__ __lib_inline__ Point_ operator*(const Point_ &a, const Point_ &b) { return { a.X * b.X, a.Y * b.Y }; } template __any_device__ __lib_inline__ Point_ operator-(const Point_ &p, W v) { return { p.X - v, p.Y - v }; } template __any_device__ __lib_inline__ Point_ &Point_::operator+=(const Point_ &p) { X = X + p.X; Y = Y + p.Y; return *this; } template __any_device__ __lib_inline__ Point_ &Point_::operator-=(const Point_ &p) { X = X - p.X; Y = Y - p.Y; return *this; } template __any_device__ __lib_inline__ Point_ &Point_::operator*=(const Point_ &p) { X = X * p.X; Y = Y * p.Y; return *this; } template __any_device__ __lib_inline__ Point_ &Point_::operator/=(const Point_ &p) { X = X / p.X; Y = Y / p.Y; return *this; } template template __any_device__ __lib_inline__ Point_ &Point_::operator/=(W val) { // TODO: This can be more efficient for float types by computing the reciprocal X /= val; Y /= val; return *this; } template template __any_device__ __lib_inline__ Point_ &Point_::operator*=(W val) { X *= val; Y *= val; return *this; } template __any_device__ __lib_inline__ T dot(const Point_ &a, const Point_ &b) { return a.X * b.X + a.Y * b.Y; } template __any_device__ __lib_inline__ T dot(const Point_ &p) { return dot(p, p); } template __any_device__ __lib_inline__ T length(const Point_ &p) { #ifndef __NVCC__ using std::sqrt; #endif return sqrt(dot(p)); } template __any_device__ __lib_inline__ Point_ normalize(const Point_ &p) { static constexpr T epsilon = std::numeric_limits::epsilon(); auto len = length(p) + epsilon; return { p.X / len, p.Y / len }; } template __any_device__ __lib_inline__ Point_ ortho_2d(const Point_ &p) { return { -p.Y, p.X }; } template __host__ __lib_inline__ std::ostream &operator<<(std::ostream &os, const Point_ &p) { return os << "(" << p.X << ", " << p.Y << ")"; } template __host__ __lib_inline__ std::ostream &operator<<(std::ostream &os, const AABB_ &b) { return os << "[(" << b.X << ", " << b.Y << "), (" << b.MaxX << ", " << b.MaxY << ")]"; } template __host__ __lib_inline__ std::ostream &operator<<(std::ostream &os, const Segment_ &s) { return os << "[(" << s.A.X << ", " << s.A.Y << "), (" << s.B.X << ", " << s.B.Y << ")]"; } template __host__ __lib_inline__ std::ostream &operator<<(std::ostream &os, const Quad_ &q) { os << "[" << q.Vertices[0]; for (size_t i = 1; i < 4; ++i) { os << ", " << q.Vertices[i]; } return os << "]"; } template __any_device__ __lib_inline__ int _signum(T val) { return (T(0) < val) - (val < T(0)); } template __any_device__ __lib_inline__ T sign(const Point_ &p1, const Point_ &p2, const Point_ &p3) { T ret = (p1.X - p3.X) * (p2.Y - p3.Y) - (p2.X - p3.X) * (p1.Y - p3.Y); auto sgn = _signum(ret); return sgn; } template __any_device__ __lib_inline__ T Segment_::Length() const { #ifndef __NVCC__ using std::sqrt; #endif return sqrt(LengthSq()); } template __any_device__ __lib_inline__ T Segment_::LengthSq() const { return dot(B - A); } template __any_device__ inline bool Segment_::Intersection(const Segment_ &other, Point_ &out_ptAlong) const { auto p1 = A, p2 = B, p3 = other.A, p4 = other.B; auto denom = (p4.Y - p3.Y) * (p2.X - p1.X) - (p4.X - p3.X) * (p2.Y - p1.Y); if (abs(denom) < 1e-8) { return false; } auto numer = (p4.X - p3.X) * (p1.Y - p3.Y) - (p4.Y - p3.Y) * (p1.X - p3.X); auto t = numer / denom; auto Bnumer = (p2.X - p1.X) * (p1.Y - p3.Y) - (p2.Y - p1.Y) * (p1.X - p3.X); auto Bt = Bnumer / denom; if (t < 0 || t > 1 || Bt < 0 || Bt > 1) { return false; } out_ptAlong = A + t * (B - A); return true; } template __any_device__ auto quad_center(const quad_t &quad) -> Point_ { typedef typename quad_t::inner_type T; Point_ center = quad[0]; for (size_t i = 1; i < 4; ++i) { center += quad[i]; } return center / T{ 4 }; } template __any_device__ Point_ QuadBase_::Center() const { return quad_center(static_cast(*this)); } template __any_device__ auto quad_bounds(const quad_t &quad) -> AABB_ { #ifndef __NVCC__ using std::min; using std::max; #endif auto minP = quad[0]; auto maxP = minP; for (size_t i = 1; i < 4; ++i) { auto qp = quad[i]; minP = min(minP, qp); maxP = max(maxP, qp); } return { minP.X, minP.Y, maxP.X, maxP.Y }; } template __any_device__ AABB_ QuadBase_::Bounds() const { return quad_bounds(static_cast(*this)); } template __any_device__ inline bool quad_contains(const Quad_t &quad, const point_t &pt) { #ifndef __NVCC__ using std::abs; #endif // Checks that the point lies on the interior side of each half plane auto d1 = sign(pt, quad[0], quad[1]); auto d2 = sign(pt, quad[1], quad[2]); auto d3 = sign(pt, quad[2], quad[3]); auto d4 = sign(pt, quad[3], quad[0]); // bool has_neg = (d1 < 0) || (d2 < 0) || (d3 < 0) || (d4 < 0); // bool has_pos = (d1 > 0) || (d2 > 0) || (d3 > 0) || (d4 > 0); int tot = d1 + d2 + d3 + d4; // return !(has_neg && has_pos); return abs(tot) == 4; } template __any_device__ __lib_inline__ bool QuadBase_::Contains(const Point_ &pt) const { return quad_contains(static_cast(*this), pt); } template __any_device__ inline auto shoelace_area(const PtList &points, size_t numPts, bool isSigned=false) -> decltype(points[0].X) { #ifndef __NVCC__ using std::abs; #endif decltype(points[0].X) area = 0; size_t j = numPts - 1; for (size_t i = 0; i < numPts; ++i) { auto Pi = points[i]; auto Pj = points[j]; area += (Pj.X + Pi.X) * (Pj.Y - Pi.Y); j = i; } area = area / 2; if (! isSigned) { area = abs(area); } return area; } template __any_device__ __lib_inline__ T QuadBase_::Height() const { auto &d = static_cast(*this); auto h1 = Segment_(d[1], d[2]).Length(); auto h2 = Segment_(d[3], d[0]).Length(); return (h1 + h2) / 2; } template __any_device__ __lib_inline__ T QuadBase_::Width() const { auto &d = static_cast(*this); auto w1 = Segment_(d[0], d[1]).Length(); auto w2 = Segment_(d[3], d[2]).Length(); return (w1 + w2) / 2; } // A quad can be defined as the sum of the area of two triangles template __any_device__ inline T QuadBase_::Area() const { // auto vertices = static_cast(this)->Vertices; return shoelace_area(static_cast(*this), 4); } template __any_device__ inline auto intersection_area(const Quad_t1 &quadsA, const Quad_t2 &quadsB) -> typename Quad_t1::inner_type { #ifndef __NVCC__ using std::atan2; #endif typedef typename Quad_t1::inner_type T; static const size_t MAX_PTS = 32; Point_ points[MAX_PTS], sortedPoints[MAX_PTS]; T angles[MAX_PTS]; size_t indices[MAX_PTS]; size_t numPts = 0; auto addPt = [&] (const Point_ &p) { points[numPts] = p; ++numPts; }; for (size_t i = 0; i < 4; ++i) { Point_ aPt = quadsA[i]; Point_ bPt = quadsB[i]; if (quadsA.Contains(bPt)) { addPt(bPt); } if (quadsB.Contains(aPt)) { addPt(aPt); } } for (size_t i = 0; i < 4; ++i) { Segment_ segA{ quadsA[i], quadsA[(i + 1) % 4] }; for (size_t j = 0; j < 4; ++j) { Segment_ segB{ quadsB[j], quadsB[(j + 1) % 4] }; Point_ ptAlong; if (segA.Intersection(segB, ptAlong)) { addPt(ptAlong); } } } if (numPts == 0) { return 0; } Point_ center{ 0, 0 }; for (size_t i = 0; i < numPts; ++i) { center += points[i]; } center /= numPts; for (size_t i = 0; i < numPts; ++i) { points[i] -= center; angles[i] = atan2(points[i].Y, points[i].X); indices[i] = i; } // Perform an argsort over the angles SORT_ALGO(indices, indices + numPts, [&] (size_t a, size_t b) { return angles[a] < angles[b]; } ); for (size_t i = 0; i < numPts; ++i) { sortedPoints[i] = points[indices[i]]; } // Finally, we can compute the area of this polygon using the shoelace formula T area = shoelace_area(sortedPoints, numPts); return area; } template template __any_device__ __lib_inline__ T QuadBase_::IntersectionArea(const QuadBase_ &other) const { return intersection_area( static_cast(*this), static_cast(other) ); } template __any_device__ __lib_inline__ auto geometry_iou(const T1 &a, const T2 &b) -> decltype(a.Area()) { auto aArea = a.Area(); auto bArea = b.Area(); auto ixArea = a.IntersectionArea(b); auto unionArea = aArea + bArea - ixArea; return ixArea / unionArea; } template template __any_device__ __lib_inline__ T QuadBase_::IOU(const QuadBase_ &other) const { return geometry_iou( static_cast(*this), static_cast(other) ); } template template __any_device__ __lib_inline__ T QuadBase_::IOU_UpperBound(const QuadBase_ &other) const { return geometry_iou( Bounds(), other.Bounds() ); } template __any_device__ __lib_inline__ auto geometry_region_sizes(const T1 &a, const T2 &b) -> tuple_t { auto aArea = a.Area(); auto bArea = b.Area(); auto ixArea = a.IntersectionArea(b); auto unionArea = aArea + bArea - ixArea; auto iou = ixArea / unionArea; return { ixArea / aArea, ixArea / bArea, iou }; } template template __any_device__ __lib_inline__ tuple_t QuadBase_::RegionSizes(const QuadBase_ &other) const { return geometry_region_sizes( static_cast(*this), static_cast(other) ); } template template __any_device__ __lib_inline__ tuple_t QuadBase_::RegionSizes_UpperBound(const QuadBase_ &other) const { return geometry_region_sizes( Bounds(), other.Bounds() ); } template __any_device__ auto polygon_bounds(const polygon_t &poly) -> AABB_ { #ifndef __NVCC__ using std::min; using std::max; #endif auto minP = poly[0]; auto maxP = minP; for (size_t i = 1; i < poly.Count; ++i) { auto qp = poly[i]; minP = min(minP, qp); maxP = max(maxP, qp); } return { minP.X, minP.Y, maxP.X, maxP.Y }; } template __any_device__ AABB_ PolygonBase_::Bounds() const { return polygon_bounds(static_cast(*this)); } template __any_device__ bool polygon_contains(const polygon_t &poly, const point_t &pt) { typedef typename polygon_t::inner_type T; // Some arbitrary segment. Technically this should be a ray, but functionally this will work Segment_ testSeg{ pt, { -1e6, -2e6 }}; Point_ trash; int32_t ixCount = 0; for (size_t i = 0; i < poly.Count; ++i) { Segment_ polySeg{ poly[i], poly[(i + 1) % poly.Count] }; if (testSeg.Intersection(polySeg, trash)) { ++ixCount; } } // If there are an odd number of intersections, then the point is inside return (ixCount % 2) == 1; } template __any_device__ bool PolygonBase_::Contains(const Point_ &pt) const { return polygon_contains(static_cast(*this), pt); } template __any_device__ auto polygon_edge_length(const polygon_t &poly) -> typename polygon_t::inner_type { typedef typename polygon_t::inner_type T; T ret = 0; for (size_t i = 0; i < poly.Count; ++i) { Segment_ seg{ poly[i], poly[(i + 1) % poly.Count] }; ret += seg.Length(); } return ret; } template __any_device__ T PolygonBase_::EdgeLength() const { return polygon_edge_length(static_cast(*this)); } template __any_device__ auto polygon_center(const polygon_t &poly) -> Point_ { typedef typename polygon_t::inner_type T; T cx = 0, cy = 0, a = 0; size_t j = poly.Count - 1; for (size_t i = 0; i < poly.Count; ++i) { Point_ p0 = poly[i]; Point_ p1 = poly[j]; T common = (p0.X * p1.Y - p1.X * p0.Y); cx += (p0.X + p1.X) * common; cy += (p0.Y + p1.Y) * common; a += common; j = i; } a /= 2; Point_ center{ cx / (6 * a), cy / (6 * a) }; return center; } template __any_device__ Point_ PolygonBase_::Center() const { return polygon_center(static_cast(*this)); } template __any_device__ T PolygonBase_::Area() const { const Derived &dThis = static_cast(*this); return shoelace_area(dThis, dThis.Count); } template __any_device__ Point_ nearest_point_on_segment(const Point_ &pt, const Segment_ &seg) { #ifndef __NVCC__ using std::max; using std::min; #endif const T l2 = seg.LengthSq(); if (l2 == 0.0) { return seg.A; } const auto v = seg.A; const auto w = seg.B; // Consider the line extending the segment, parameterized as v + t*(w-v) // Find projection of point p onto the line auto t = dot(pt - v, w - v) / l2; // Clamp between t=0 and t=1 t = max(static_cast(0), min(static_cast(1), t)); const auto projection = v + t * (w - v); return projection; } template __any_device__ Segment_ shortest_line_between_segments(const Segment_ &a, const Segment_ &b) { Segment_ segs[] = { { a.A, nearest_point_on_segment(a.A, b) }, { a.B, nearest_point_on_segment(a.B, b) }, { nearest_point_on_segment(b.A, a), b.A }, { nearest_point_on_segment(b.B, a), b.B } }; T minDist = std::numeric_limits::max(); size_t idx; #pragma unroll for (size_t i = 0; i < 4; ++i) { T dist = segs[i].LengthSq(); if (dist < minDist) { minDist = dist; idx = i; } } return segs[idx]; } // Find the distance between a point and the nearest point along the specified segment template __any_device__ T distance_to_segment(const Point_ &pt, const Segment_ &seg) { auto projection = nearest_point_on_segment(pt, seg); auto dist = length(pt - projection); return dist; }