| // | |
| // Generated by NVIDIA NVVM Compiler | |
| // | |
| // Compiler Build ID: CL-29618528 | |
| // Cuda compilation tools, release 11.2, V11.2.152 | |
| // Based on NVVM 7.0.1 | |
| // | |
| .version 7.2 | |
| .target sm_52 | |
| .address_size 64 | |
| // .globl __raygen__rg | |
| .const .align 8 .b8 params[32]; | |
| .global .align 4 .f32 _ZZN4tcnn19gaussian_cdf_approxEffE20MAGIC_SIGMOID_FACTOR = 0f3F4ABDDD; | |
| .global .align 4 .f32 _ZZN4tcnn30gaussian_cdf_approx_derivativeEffE20MAGIC_SIGMOID_FACTOR = 0f3F4ABDDD; | |
| .global .align 4 .b8 _ZZN3ngp5sobolEjjE10directions[640] = {0, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 160, 0, 0, 0, 240, 0, 0, 0, 136, 0, 0, 0, 204, 0, 0, 0, 170, 0, 0, 0, 255, 0, 0, 128, 128, 0, 0, 192, 192, 0, 0, 160, 160, 0, 0, 240, 240, 0, 0, 136, 136, 0, 0, 204, 204, 0, 0, 170, 170, 0, 0, 255, 255, 0, 128, 0, 128, 0, 192, 0, 192, 0, 160, 0, 160, 0, 240, 0, 240, 0, 136, 0, 136, 0, 204, 0, 204, 0, 170, 0, 170, 0, 255, 0, 255, 128, 128, 128, 128, 192, 192, 192, 192, 160, 160, 160, 160, 240, 240, 240, 240, 136, 136, 136, 136, 204, 204, 204, 204, 170, 170, 170, 170, 255, 255, 255, 255, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 96, 0, 0, 0, 144, 0, 0, 0, 232, 0, 0, 0, 92, 0, 0, 0, 142, 0, 0, 0, 197, 0, 0, 128, 104, 0, 0, 192, 156, 0, 0, 96, 238, 0, 0, 144, 85, 0, 0, 104, 128, 0, 0, 156, 192, 0, 0, 238, 96, 0, 0, 85, 144, 0, 128, 128, 232, 0, 192, 192, 92, 0, 96, 96, 142, 0, 144, 144, 197, 0, 232, 104, 104, 0, 92, 156, 156, 0, 142, 238, 238, 0, 197, 85, 85, 128, 232, 0, 128, 192, 92, 0, 192, 96, 142, 0, 96, 144, 197, 0, 144, 104, 104, 0, 232, 156, 156, 0, 92, 238, 238, 0, 142, 85, 85, 0, 197, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 32, 0, 0, 0, 80, 0, 0, 0, 248, 0, 0, 0, 116, 0, 0, 0, 162, 0, 0, 0, 147, 0, 0, 128, 216, 0, 0, 64, 37, 0, 0, 224, 89, 0, 0, 208, 230, 0, 0, 8, 120, 0, 0, 12, 180, 0, 0, 2, 130, 0, 0, 5, 195, 0, 128, 143, 32, 0, 64, 71, 81, 0, 32, 234, 251, 0, 48, 217, 117, 0, 136, 133, 160, 0, 84, 78, 145, 0, 158, 231, 219, 0, 109, 219, 37, 128, 0, 128, 88, 192, 0, 64, 229, 32, 0, 224, 121, 80, 0, 208, 182, 248, 0, 8, 128, 116, 0, 12, 192, 162, 0, 2, 32, 147, 0, 5, 80, 0, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 176, 0, 0, 0, 248, 0, 0, 0, 220, 0, 0, 0, 122, 0, 0, 0, 157, 0, 0, 128, 90, 0, 0, 192, 47, 0, 0, 96, 161, 0, 0, 176, 240, 0, 0, 136, 218, 0, 0, 196, 111, 0, 0, 98, 129, 0, 0, 187, 64, 0, 128, 135, 34, 0, 192, 201, 179, 0, 160, 101, 251, 0, 208, 178, 221, 0, 40, 2, 120, 0, 60, 11, 156, 0, 182, 15, 90, 0, 219, 13, 45, 128, 128, 135, 162, 64, 192, 201, 243, 32, 160, 101, 219, 176, 208, 178, 109, 248, 40, 2, 128, 220, 60, 11, 64, 122, 182, 15, 32, 157, 219, 13, 176}; | |
| .visible .entry __raygen__rg() | |
| { | |
| .reg .pred %p<3>; | |
| .reg .f32 %f<51>; | |
| .reg .b32 %r<81>; | |
| .reg .b64 %rd<13>; | |
| // begin inline asm | |
| call (%r2), _optix_get_launch_index_x, (); | |
| // end inline asm | |
| ld.const.u64 %rd3, [params]; | |
| cvta.to.global.u64 %rd4, %rd3; | |
| mul.wide.u32 %rd5, %r2, 12; | |
| add.s64 %rd6, %rd4, %rd5; | |
| ld.global.f32 %f11, [%rd6]; | |
| ld.global.f32 %f12, [%rd6+4]; | |
| ld.global.f32 %f13, [%rd6+8]; | |
| ld.const.u64 %rd7, [params+8]; | |
| cvta.to.global.u64 %rd8, %rd7; | |
| add.s64 %rd1, %rd8, %rd5; | |
| ld.global.f32 %f14, [%rd1]; | |
| ld.global.f32 %f15, [%rd1+4]; | |
| ld.global.f32 %f16, [%rd1+8]; | |
| ld.const.u64 %rd2, [params+24]; | |
| mov.f32 %f18, 0f5A0E1BCA; | |
| mov.f32 %f19, 0f00000000; | |
| mov.u32 %r41, 255; | |
| mov.u32 %r44, 1; | |
| mov.u32 %r46, 2; | |
| mov.u32 %r78, 0; | |
| // begin inline asm | |
| call(%r8,%r9,%r10,%r11,%r12,%r13,%r14,%r15,%r16,%r17,%r18,%r19,%r20,%r21,%r22,%r23,%r24,%r25,%r26,%r27,%r28,%r29,%r30,%r31,%r32,%r33,%r34,%r35,%r36,%r37,%r38,%r39),_optix_trace_typed_32,(%r78,%rd2,%f11,%f12,%f13,%f14,%f15,%f16,%f19,%f18,%f19,%r41,%r44,%r78,%r44,%r78,%r46,%r79,%r80,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78,%r78); | |
| // end inline asm | |
| mov.b32 %f20, %r9; | |
| fma.rn.f32 %f21, %f14, %f20, %f11; | |
| st.global.f32 [%rd6], %f21; | |
| fma.rn.f32 %f22, %f15, %f20, %f12; | |
| st.global.f32 [%rd6+4], %f22; | |
| fma.rn.f32 %f23, %f16, %f20, %f13; | |
| st.global.f32 [%rd6+8], %f23; | |
| setp.eq.s32 %p1, %r8, -1; | |
| @%p1 bra LBB0_4; | |
| ld.const.u64 %rd9, [params+16]; | |
| cvta.to.global.u64 %rd10, %rd9; | |
| mul.wide.u32 %rd11, %r8, 36; | |
| add.s64 %rd12, %rd10, %rd11; | |
| ld.global.f32 %f24, [%rd12+4]; | |
| ld.global.f32 %f25, [%rd12+16]; | |
| sub.f32 %f26, %f25, %f24; | |
| ld.global.f32 %f27, [%rd12+8]; | |
| ld.global.f32 %f28, [%rd12+32]; | |
| sub.f32 %f29, %f28, %f27; | |
| mul.f32 %f30, %f26, %f29; | |
| ld.global.f32 %f31, [%rd12+20]; | |
| sub.f32 %f32, %f31, %f27; | |
| ld.global.f32 %f33, [%rd12+28]; | |
| sub.f32 %f34, %f33, %f24; | |
| mul.f32 %f35, %f32, %f34; | |
| sub.f32 %f49, %f30, %f35; | |
| ld.global.f32 %f36, [%rd12]; | |
| ld.global.f32 %f37, [%rd12+24]; | |
| sub.f32 %f38, %f37, %f36; | |
| mul.f32 %f39, %f32, %f38; | |
| ld.global.f32 %f40, [%rd12+12]; | |
| sub.f32 %f41, %f40, %f36; | |
| mul.f32 %f42, %f41, %f29; | |
| sub.f32 %f48, %f39, %f42; | |
| mul.f32 %f43, %f41, %f34; | |
| mul.f32 %f44, %f26, %f38; | |
| sub.f32 %f50, %f43, %f44; | |
| mul.f32 %f45, %f50, %f50; | |
| fma.rn.f32 %f46, %f48, %f48, %f45; | |
| fma.rn.f32 %f4, %f49, %f49, %f46; | |
| setp.leu.f32 %p2, %f4, 0f00000000; | |
| @%p2 bra LBB0_3; | |
| sqrt.rn.f32 %f47, %f4; | |
| div.rn.f32 %f49, %f49, %f47; | |
| div.rn.f32 %f48, %f48, %f47; | |
| div.rn.f32 %f50, %f50, %f47; | |
| LBB0_3: | |
| st.global.f32 [%rd1], %f49; | |
| st.global.f32 [%rd1+4], %f48; | |
| st.global.f32 [%rd1+8], %f50; | |
| LBB0_4: | |
| ret; | |
| } | |
| // .globl __miss__ms | |
| .visible .entry __miss__ms() | |
| { | |
| .reg .f32 %f<2>; | |
| .reg .b32 %r<5>; | |
| mov.u32 %r1, 0; | |
| mov.u32 %r2, -1; | |
| // begin inline asm | |
| call _optix_set_payload, (%r1, %r2); | |
| // end inline asm | |
| // begin inline asm | |
| call (%f1), _optix_get_ray_tmax, (); | |
| // end inline asm | |
| mov.b32 %r4, %f1; | |
| mov.u32 %r3, 1; | |
| // begin inline asm | |
| call _optix_set_payload, (%r3, %r4); | |
| // end inline asm | |
| ret; | |
| } | |
| // .globl __closesthit__ch | |
| .visible .entry __closesthit__ch() | |
| { | |
| .reg .f32 %f<2>; | |
| .reg .b32 %r<6>; | |
| // begin inline asm | |
| call (%r1), _optix_read_primitive_idx, (); | |
| // end inline asm | |
| mov.u32 %r2, 0; | |
| // begin inline asm | |
| call _optix_set_payload, (%r2, %r1); | |
| // end inline asm | |
| // begin inline asm | |
| call (%f1), _optix_get_ray_tmax, (); | |
| // end inline asm | |
| mov.b32 %r5, %f1; | |
| mov.u32 %r4, 1; | |
| // begin inline asm | |
| call _optix_set_payload, (%r4, %r5); | |
| // end inline asm | |
| ret; | |
| } | |