Spaces:

paulpanwang
/

Diffsplat

Sleeping

App Files Files Community

Diffsplat / extensions /RaDe-GS /submodules /diff-gaussian-rasterization /cuda_rasterizer /auxiliary.h

paulpanwang

Upload folder using huggingface_hub

476e0f0 verified about 1 year ago

raw

history blame contribute delete

11.5 kB

	/*
	* Copyright (C) 2023, Inria
	* GRAPHDECO research group, https://team.inria.fr/graphdeco
	* All rights reserved.
	*
	* This software is free for non-commercial, research and evaluation use
	* under the terms of the LICENSE.md file.
	*
	* For inquiries contact george.drettakis@inria.fr
	*/

	#ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
	#define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED

	#include "config.h"
	#include "stdio.h"
	#include <glm/glm.hpp>

	#define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
	#define NUM_WARPS (BLOCK_SIZE/32)
	#define NEAR_PLANE 0.2
	#define FAR_PLANE 100.0
	#define NORMALIZE_EPS 1.0E-12F

	#define DEPTH_OFFSET 6
	#define ALPHA_OFFSET 7
	#define DISTORTION_OFFSET 8
	#define OUTPUT_CHANNELS 9

	// #define MAX_NUM_CONTRIBUTORS 256
	#define MAX_NUM_CONTRIBUTORS 512
	#define MAX_NUM_PROJECTED 256

	// Spherical harmonics coefficients
	__device__ const float SH_C0 = 0.28209479177387814f;
	__device__ const float SH_C1 = 0.4886025119029199f;
	__device__ const float SH_C2[] = {
	1.0925484305920792f,
	-1.0925484305920792f,
	0.31539156525252005f,
	-1.0925484305920792f,
	0.5462742152960396f
	};
	__device__ const float SH_C3[] = {
	-0.5900435899266435f,
	2.890611442640554f,
	-0.4570457994644658f,
	0.3731763325901154f,
	-0.4570457994644658f,
	1.445305721320277f,
	-0.5900435899266435f
	};

	// __device__ const float kernel_size = 0.1;
	// __device__ const float kernel_size = 0.0;

	__forceinline__ __device__ float ndc2Pix(float v, int S)
	{
	return ((v + 1.0) * S - 1.0) * 0.5;
	}

	__forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid)
	{
	rect_min = {
	min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))),
	min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y)))
	};
	rect_max = {
	min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))),
	min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y)))
	};
	}

	__forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
	{
	float3 transformed = {
	matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
	matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
	matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
	};
	return transformed;
	}

	__forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix)
	{
	float4 transformed = {
	matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
	matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
	matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
	matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15]
	};
	return transformed;
	}

	__forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix)
	{
	float3 transformed = {
	matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z,
	matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z,
	matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z,
	};
	return transformed;
	}

	__forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix)
	{
	float3 transformed = {
	matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z,
	matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z,
	matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z,
	};
	return transformed;
	}

	__forceinline__ __device__ float dnormvdz(float3 v, float3 dv)
	{
	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
	float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
	return dnormvdz;
	}

	__forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv)
	{
	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);

	float3 dnormvdv;
	dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32;
	dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32;
	dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
	return dnormvdv;
	}

	__forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv)
	{
	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);

	float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
	float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w;
	float4 dnormvdv;
	dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32;
	dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32;
	dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32;
	dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32;
	return dnormvdv;
	}

	__forceinline__ __device__ float sigmoid(float x)
	{
	return 1.0f / (1.0f + expf(-x));
	}

	__forceinline__ __device__ bool in_frustum(int idx,
	const float* orig_points,
	const float* viewmatrix,
	const float* projmatrix,
	bool prefiltered,
	float3& p_view)
	{
	float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };

	// Bring points to screen space
	float4 p_hom = transformPoint4x4(p_orig, projmatrix);
	float p_w = 1.0f / (p_hom.w + 0.0000001f);
	float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
	p_view = transformPoint4x3(p_orig, viewmatrix);

	if (p_view.z <= 0.2f)// \|\| ((p_proj.x < -1.3 \|\| p_proj.x > 1.3 \|\| p_proj.y < -1.3 \|\| p_proj.y > 1.3)))
	{
	if (prefiltered)
	{
	printf("Point is filtered although prefiltered is set. This shouldn't happen!");
	__trap();
	}
	return false;
	}
	return true;
	}

	namespace glm_modification
	{
	// Incorporate the transferSign, pythag, equal, and findEigenvaluesSymReal functions from the glm library,
	// with small modifications on findEgienvaluesSymReal to ensure numerical stability for big Gaussian kernels.
	// https://github.com/g-truc/glm/blob/33b4a621a697a305bc3a7610d290677b96beb181/glm/gtx/pca.inl
	// https://github.com/g-truc/glm/blob/33b4a621a697a305bc3a7610d290677b96beb181/glm/ext/scalar_relational.inl
	template<typename genType>
	__forceinline__ __device__ bool equal(genType const& x, genType const& y, genType const& epsilon)
	{
	return abs(x - y) <= epsilon;
	}

	template<typename T>
	__forceinline__ __device__ static T transferSign(T const& v, T const& s)
	{
	return ((s) >= 0 ? glm::abs(v) : -glm::abs(v));
	}

	template<typename T>
	__forceinline__ __device__ static T pythag(T const& a, T const& b) {
	static const T epsilon = static_cast<T>(0.0000001);
	T absa = glm::abs(a);
	T absb = glm::abs(b);
	if(absa > absb) {
	absb /= absa;
	absb *= absb;
	return absa * glm::sqrt(static_cast<T>(1) + absb);
	}
	if(glm_modification::equal<T>(absb, 0, epsilon)) return static_cast<T>(0);
	absa /= absb;
	absa *= absa;
	return absb * glm::sqrt(static_cast<T>(1) + absa);
	}


	template<glm::length_t D, typename T, glm::qualifier Q>
	__forceinline__ __device__ unsigned int findEigenvaluesSymReal
	(
	glm::mat<D, D, T, Q> const& covarMat,
	glm::vec<D, T, Q>& outEigenvalues,
	glm::mat<D, D, T, Q>& outEigenvectors
	)
	{

	T a[D * D]; // matrix -- input and workspace for algorithm (will be changed inplace)
	T d[D]; // diagonal elements
	T e[D]; // off-diagonal elements

	for(glm::length_t r = 0; r < D; r++)
	for(glm::length_t c = 0; c < D; c++)
	a[(r) * D + (c)] = covarMat[c][r];

	// 1. Householder reduction.
	glm::length_t l, k, j, i;
	T scale, hh, h, g, f;
	static const T epsilon = static_cast<T>(0.0000001);

	for(i = D; i >= 2; i--)
	{
	l = i - 1;
	h = scale = 0;
	if(l > 1)
	{
	for(k = 1; k <= l; k++)
	{
	scale += glm::abs(a[(i - 1) * D + (k - 1)]);
	}
	if(glm_modification::equal<T>(scale, 0, epsilon))
	{
	e[i - 1] = a[(i - 1) * D + (l - 1)];
	}
	else
	{
	for(k = 1; k <= l; k++)
	{
	a[(i - 1) * D + (k - 1)] /= scale;
	h += a[(i - 1) * D + (k - 1)] * a[(i - 1) * D + (k - 1)];
	}
	f = a[(i - 1) * D + (l - 1)];
	g = ((f >= 0) ? -glm::sqrt(h) : glm::sqrt(h));
	e[i - 1] = scale * g;
	h -= f * g;
	a[(i - 1) * D + (l - 1)] = f - g;
	f = 0;
	for(j = 1; j <= l; j++)
	{
	a[(j - 1) * D + (i - 1)] = a[(i - 1) * D + (j - 1)] / h;
	g = 0;
	for(k = 1; k <= j; k++)
	{
	g += a[(j - 1) * D + (k - 1)] * a[(i - 1) * D + (k - 1)];
	}
	for(k = j + 1; k <= l; k++)
	{
	g += a[(k - 1) * D + (j - 1)] * a[(i - 1) * D + (k - 1)];
	}
	e[j - 1] = g / h;
	f += e[j - 1] * a[(i - 1) * D + (j - 1)];
	}
	hh = f / (h + h);
	for(j = 1; j <= l; j++)
	{
	f = a[(i - 1) * D + (j - 1)];
	e[j - 1] = g = e[j - 1] - hh * f;
	for(k = 1; k <= j; k++)
	{
	a[(j - 1) * D + (k - 1)] -= (f * e[k - 1] + g * a[(i - 1) * D + (k - 1)]);
	}
	}
	}
	}
	else
	{
	e[i - 1] = a[(i - 1) * D + (l - 1)];
	}
	d[i - 1] = h;
	}
	d[0] = 0;
	e[0] = 0;
	for(i = 1; i <= D; i++)
	{
	l = i - 1;
	if(!glm_modification::equal<T>(d[i - 1], 0, epsilon))
	{
	for(j = 1; j <= l; j++)
	{
	g = 0;
	for(k = 1; k <= l; k++)
	{
	g += a[(i - 1) * D + (k - 1)] * a[(k - 1) * D + (j - 1)];
	}
	for(k = 1; k <= l; k++)
	{
	a[(k - 1) * D + (j - 1)] -= g * a[(k - 1) * D + (i - 1)];
	}
	}
	}
	d[i - 1] = a[(i - 1) * D + (i - 1)];
	a[(i - 1) * D + (i - 1)] = 1;
	for(j = 1; j <= l; j++)
	{
	a[(j - 1) * D + (i - 1)] = a[(i - 1) * D + (j - 1)] = 0;
	}
	}

	// 2. Calculation of eigenvalues and eigenvectors (QL algorithm)
	glm::length_t m, iter;
	T s, r, p, dd, c, b;
	const glm::length_t MAX_ITER = 30;

	for(i = 2; i <= D; i++)
	{
	e[i - 2] = e[i - 1];
	}
	e[D - 1] = 0;

	for(l = 1; l <= D; l++)
	{
	iter = 0;
	do
	{
	for(m = l; m <= D - 1; m++)
	{
	dd = glm::abs(d[m - 1]) + glm::abs(d[m - 1 + 1]);
	if(glm_modification::equal<T>(glm::abs(e[m - 1]), 0, epsilon))
	break;
	}
	if(m != l)
	{
	if(iter++ == MAX_ITER)
	{
	return 0; // Too many iterations in FindEigenvalues
	}
	g = (d[l - 1 + 1] - d[l - 1]) / (2 * e[l - 1]);
	r = pythag<T>(g, 1);
	g = d[m - 1] - d[l - 1] + e[l - 1] / (g + transferSign(r, g));
	s = c = 1;
	p = 0;
	for(i = m - 1; i >= l; i--)
	{
	f = s * e[i - 1];
	b = c * e[i - 1];
	e[i - 1 + 1] = r = pythag(f, g);
	if(glm_modification::equal<T>(r, 0, epsilon))
	{
	d[i - 1 + 1] -= p;
	e[m - 1] = 0;
	break;
	}
	s = f / r;
	c = g / r;
	g = d[i - 1 + 1] - p;
	r = (d[i - 1] - g) * s + 2 * c * b;
	d[i - 1 + 1] = g + (p = s * r);
	g = c * r - b;
	for(k = 1; k <= D; k++)
	{
	f = a[(k - 1) * D + (i - 1 + 1)];
	a[(k - 1) * D + (i - 1 + 1)] = s * a[(k - 1) * D + (i - 1)] + c * f;
	a[(k - 1) * D + (i - 1)] = c * a[(k - 1) * D + (i - 1)] - s * f;
	}
	}
	if(glm_modification::equal<T>(r, 0, epsilon) && (i >= l))
	continue;
	d[l - 1] -= p;
	e[l - 1] = g;
	e[m - 1] = 0;
	}
	} while(m != l);
	}

	// 3. output
	for(i = 0; i < D; i++)
	outEigenvalues[i] = d[i];
	for(i = 0; i < D; i++)
	for(j = 0; j < D; j++)
	outEigenvectors[i][j] = a[(j) * D + (i)];

	return D;
	}
	}

	#define CHECK_CUDA(A, debug) \
	A; if(debug) { \
	auto ret = cudaDeviceSynchronize(); \
	if (ret != cudaSuccess) { \
	std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \
	throw std::runtime_error(cudaGetErrorString(ret)); \
	} \
	}

	#endif