instant-ngp build

7873319 about 3 years ago

6.64 kB

	//
	// Generated by NVIDIA NVVM Compiler
	//
	// Compiler Build ID: CL-29618528
	// Cuda compilation tools, release 11.2, V11.2.152
	// Based on NVVM 7.0.1
	//

	.version 7.2
	.target sm_52
	.address_size 64

	// .globl __raygen__rg
	.const .align 8 .b8 params[32];
	.global .align 4 .f32 _ZZN4tcnn19gaussian_cdf_approxEffE20MAGIC_SIGMOID_FACTOR = 0f3F4ABDDD;
	.global .align 4 .f32 _ZZN4tcnn30gaussian_cdf_approx_derivativeEffE20MAGIC_SIGMOID_FACTOR = 0f3F4ABDDD;
	.global .align 4 .b8 _ZZN3ngp5sobolEjjE10directions[640] = {0, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 160, 0, 0, 0, 240, 0, 0, 0, 136, 0, 0, 0, 204, 0, 0, 0, 170, 0, 0, 0, 255, 0, 0, 128, 128, 0, 0, 192, 192, 0, 0, 160, 160, 0, 0, 240, 240, 0, 0, 136, 136, 0, 0, 204, 204, 0, 0, 170, 170, 0, 0, 255, 255, 0, 128, 0, 128, 0, 192, 0, 192, 0, 160, 0, 160, 0, 240, 0, 240, 0, 136, 0, 136, 0, 204, 0, 204, 0, 170, 0, 170, 0, 255, 0, 255, 128, 128, 128, 128, 192, 192, 192, 192, 160, 160, 160, 160, 240, 240, 240, 240, 136, 136, 136, 136, 204, 204, 204, 204, 170, 170, 170, 170, 255, 255, 255, 255, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 96, 0, 0, 0, 144, 0, 0, 0, 232, 0, 0, 0, 92, 0, 0, 0, 142, 0, 0, 0, 197, 0, 0, 128, 104, 0, 0, 192, 156, 0, 0, 96, 238, 0, 0, 144, 85, 0, 0, 104, 128, 0, 0, 156, 192, 0, 0, 238, 96, 0, 0, 85, 144, 0, 128, 128, 232, 0, 192, 192, 92, 0, 96, 96, 142, 0, 144, 144, 197, 0, 232, 104, 104, 0, 92, 156, 156, 0, 142, 238, 238, 0, 197, 85, 85, 128, 232, 0, 128, 192, 92, 0, 192, 96, 142, 0, 96, 144, 197, 0, 144, 104, 104, 0, 232, 156, 156, 0, 92, 238, 238, 0, 142, 85, 85, 0, 197, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 32, 0, 0, 0, 80, 0, 0, 0, 248, 0, 0, 0, 116, 0, 0, 0, 162, 0, 0, 0, 147, 0, 0, 128, 216, 0, 0, 64, 37, 0, 0, 224, 89, 0, 0, 208, 230, 0, 0, 8, 120, 0, 0, 12, 180, 0, 0, 2, 130, 0, 0, 5, 195, 0, 128, 143, 32, 0, 64, 71, 81, 0, 32, 234, 251, 0, 48, 217, 117, 0, 136, 133, 160, 0, 84, 78, 145, 0, 158, 231, 219, 0, 109, 219, 37, 128, 0, 128, 88, 192, 0, 64, 229, 32, 0, 224, 121, 80, 0, 208, 182, 248, 0, 8, 128, 116, 0, 12, 192, 162, 0, 2, 32, 147, 0, 5, 80, 0, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 176, 0, 0, 0, 248, 0, 0, 0, 220, 0, 0, 0, 122, 0, 0, 0, 157, 0, 0, 128, 90, 0, 0, 192, 47, 0, 0, 96, 161, 0, 0, 176, 240, 0, 0, 136, 218, 0, 0, 196, 111, 0, 0, 98, 129, 0, 0, 187, 64, 0, 128, 135, 34, 0, 192, 201, 179, 0, 160, 101, 251, 0, 208, 178, 221, 0, 40, 2, 120, 0, 60, 11, 156, 0, 182, 15, 90, 0, 219, 13, 45, 128, 128, 135, 162, 64, 192, 201, 243, 32, 160, 101, 219, 176, 208, 178, 109, 248, 40, 2, 128, 220, 60, 11, 64, 122, 182, 15, 32, 157, 219, 13, 176};

	.visible .entry __raygen__rg()
	{
	.reg .pred %p<3>;
	.reg .f32 %f<51>;
	.reg .b32 %r<81>;
	.reg .b64 %rd<13>;


	// begin inline asm
	call (%r2), _optix_get_launch_index_x, ();
	// end inline asm
	ld.const.u64 %rd3, [params];
	cvta.to.global.u64 %rd4, %rd3;
	mul.wide.u32 %rd5, %r2, 12;
	add.s64 %rd6, %rd4, %rd5;
	ld.global.f32 %f11, [%rd6];
	ld.global.f32 %f12, [%rd6+4];
	ld.global.f32 %f13, [%rd6+8];
	ld.const.u64 %rd7, [params+8];
	cvta.to.global.u64 %rd8, %rd7;
	add.s64 %rd1, %rd8, %rd5;
	ld.global.f32 %f14, [%rd1];
	ld.global.f32 %f15, [%rd1+4];
	ld.global.f32 %f16, [%rd1+8];
	ld.const.u64 %rd2, [params+24];
	mov.f32 %f18, 0f5A0E1BCA;
	mov.f32 %f19, 0f00000000;
	mov.u32 %r41, 255;
	mov.u32 %r44, 1;
	mov.u32 %r46, 2;
	mov.u32 %r78, 0;
	// begin inline asm
	call(%r8,%r9,%r10,%r11,%r12,%r13,%r14,%r15,%r16,%r17,%r18,%r19,%r20,%r21,%r22,%r23,%r24,%r25,%r26,%r27,%r28,%r29,%r30,%r31,%r32,%r33,%r34,%r35,%r36,%r37,%r38,%r39),_optix_trace_typed_32,(%r78,%rd2,%f11,%f12,%f13,%f14,%f15,%f16,%f19,%f18,%f19,%r41,%r44,%r78,%r44,%r78,%r46,%r79,%r80,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78);
	// end inline asm
	mov.b32 %f20, %r9;
	fma.rn.f32 %f21, %f14, %f20, %f11;
	st.global.f32 [%rd6], %f21;
	fma.rn.f32 %f22, %f15, %f20, %f12;
	st.global.f32 [%rd6+4], %f22;
	fma.rn.f32 %f23, %f16, %f20, %f13;
	st.global.f32 [%rd6+8], %f23;
	setp.eq.s32 %p1, %r8, -1;
	@%p1 bra LBB0_4;

	ld.const.u64 %rd9, [params+16];
	cvta.to.global.u64 %rd10, %rd9;
	mul.wide.u32 %rd11, %r8, 36;
	add.s64 %rd12, %rd10, %rd11;
	ld.global.f32 %f24, [%rd12+4];
	ld.global.f32 %f25, [%rd12+16];
	sub.f32 %f26, %f25, %f24;
	ld.global.f32 %f27, [%rd12+8];
	ld.global.f32 %f28, [%rd12+32];
	sub.f32 %f29, %f28, %f27;
	mul.f32 %f30, %f26, %f29;
	ld.global.f32 %f31, [%rd12+20];
	sub.f32 %f32, %f31, %f27;
	ld.global.f32 %f33, [%rd12+28];
	sub.f32 %f34, %f33, %f24;
	mul.f32 %f35, %f32, %f34;
	sub.f32 %f49, %f30, %f35;
	ld.global.f32 %f36, [%rd12];
	ld.global.f32 %f37, [%rd12+24];
	sub.f32 %f38, %f37, %f36;
	mul.f32 %f39, %f32, %f38;
	ld.global.f32 %f40, [%rd12+12];
	sub.f32 %f41, %f40, %f36;
	mul.f32 %f42, %f41, %f29;
	sub.f32 %f48, %f39, %f42;
	mul.f32 %f43, %f41, %f34;
	mul.f32 %f44, %f26, %f38;
	sub.f32 %f50, %f43, %f44;
	mul.f32 %f45, %f50, %f50;
	fma.rn.f32 %f46, %f48, %f48, %f45;
	fma.rn.f32 %f4, %f49, %f49, %f46;
	setp.leu.f32 %p2, %f4, 0f00000000;
	@%p2 bra LBB0_3;

	sqrt.rn.f32 %f47, %f4;
	div.rn.f32 %f49, %f49, %f47;
	div.rn.f32 %f48, %f48, %f47;
	div.rn.f32 %f50, %f50, %f47;

	LBB0_3:
	st.global.f32 [%rd1], %f49;
	st.global.f32 [%rd1+4], %f48;
	st.global.f32 [%rd1+8], %f50;

	LBB0_4:
	ret;

	}
	// .globl __miss__ms
	.visible .entry __miss__ms()
	{
	.reg .f32 %f<2>;
	.reg .b32 %r<5>;


	mov.u32 %r1, 0;
	mov.u32 %r2, -1;
	// begin inline asm
	call _optix_set_payload, (%r1, %r2);
	// end inline asm
	// begin inline asm
	call (%f1), _optix_get_ray_tmax, ();
	// end inline asm
	mov.b32 %r4, %f1;
	mov.u32 %r3, 1;
	// begin inline asm
	call _optix_set_payload, (%r3, %r4);
	// end inline asm
	ret;

	}
	// .globl __closesthit__ch
	.visible .entry __closesthit__ch()
	{
	.reg .f32 %f<2>;
	.reg .b32 %r<6>;


	// begin inline asm
	call (%r1), _optix_read_primitive_idx, ();
	// end inline asm
	mov.u32 %r2, 0;
	// begin inline asm
	call _optix_set_payload, (%r2, %r1);
	// end inline asm
	// begin inline asm
	call (%f1), _optix_get_ray_tmax, ();
	// end inline asm
	mov.b32 %r5, %f1;
	mov.u32 %r4, 1;
	// begin inline asm
	call _optix_set_payload, (%r4, %r5);
	// end inline asm
	ret;

	}