| // | |
| // Generated by NVIDIA NVVM Compiler | |
| // | |
| // Compiler Build ID: CL-31833905 | |
| // Cuda compilation tools, release 11.8, V11.8.89 | |
| // Based on NVVM 7.0.1 | |
| // | |
| .version 7.8 | |
| .target sm_52 | |
| .address_size 64 | |
| // .globl __raygen__rg | |
| .const .align 8 .b8 params[24]; | |
| .global .align 4 .f32 _ZZN4tcnn19gaussian_cdf_approxEffE20MAGIC_SIGMOID_FACTOR = 0f3F4ABDDD; | |
| .global .align 4 .f32 _ZZN4tcnn30gaussian_cdf_approx_derivativeEffE20MAGIC_SIGMOID_FACTOR = 0f3F4ABDDD; | |
| .global .align 4 .b8 _ZZN3ngp5sobolEjjE10directions[640] = {0, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 160, 0, 0, 0, 240, 0, 0, 0, 136, 0, 0, 0, 204, 0, 0, 0, 170, 0, 0, 0, 255, 0, 0, 128, 128, 0, 0, 192, 192, 0, 0, 160, 160, 0, 0, 240, 240, 0, 0, 136, 136, 0, 0, 204, 204, 0, 0, 170, 170, 0, 0, 255, 255, 0, 128, 0, 128, 0, 192, 0, 192, 0, 160, 0, 160, 0, 240, 0, 240, 0, 136, 0, 136, 0, 204, 0, 204, 0, 170, 0, 170, 0, 255, 0, 255, 128, 128, 128, 128, 192, 192, 192, 192, 160, 160, 160, 160, 240, 240, 240, 240, 136, 136, 136, 136, 204, 204, 204, 204, 170, 170, 170, 170, 255, 255, 255, 255, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 96, 0, 0, 0, 144, 0, 0, 0, 232, 0, 0, 0, 92, 0, 0, 0, 142, 0, 0, 0, 197, 0, 0, 128, 104, 0, 0, 192, 156, 0, 0, 96, 238, 0, 0, 144, 85, 0, 0, 104, 128, 0, 0, 156, 192, 0, 0, 238, 96, 0, 0, 85, 144, 0, 128, 128, 232, 0, 192, 192, 92, 0, 96, 96, 142, 0, 144, 144, 197, 0, 232, 104, 104, 0, 92, 156, 156, 0, 142, 238, 238, 0, 197, 85, 85, 128, 232, 0, 128, 192, 92, 0, 192, 96, 142, 0, 96, 144, 197, 0, 144, 104, 104, 0, 232, 156, 156, 0, 92, 238, 238, 0, 142, 85, 85, 0, 197, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 32, 0, 0, 0, 80, 0, 0, 0, 248, 0, 0, 0, 116, 0, 0, 0, 162, 0, 0, 0, 147, 0, 0, 128, 216, 0, 0, 64, 37, 0, 0, 224, 89, 0, 0, 208, 230, 0, 0, 8, 120, 0, 0, 12, 180, 0, 0, 2, 130, 0, 0, 5, 195, 0, 128, 143, 32, 0, 64, 71, 81, 0, 32, 234, 251, 0, 48, 217, 117, 0, 136, 133, 160, 0, 84, 78, 145, 0, 158, 231, 219, 0, 109, 219, 37, 128, 0, 128, 88, 192, 0, 64, 229, 32, 0, 224, 121, 80, 0, 208, 182, 248, 0, 8, 128, 116, 0, 12, 192, 162, 0, 2, 32, 147, 0, 5, 80, 0, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 176, 0, 0, 0, 248, 0, 0, 0, 220, 0, 0, 0, 122, 0, 0, 0, 157, 0, 0, 128, 90, 0, 0, 192, 47, 0, 0, 96, 161, 0, 0, 176, 240, 0, 0, 136, 218, 0, 0, 196, 111, 0, 0, 98, 129, 0, 0, 187, 64, 0, 128, 135, 34, 0, 192, 201, 179, 0, 160, 101, 251, 0, 208, 178, 221, 0, 40, 2, 120, 0, 60, 11, 156, 0, 182, 15, 90, 0, 219, 13, 45, 128, 128, 135, 162, 64, 192, 201, 243, 32, 160, 101, 219, 176, 208, 178, 109, 248, 40, 2, 128, 220, 60, 11, 64, 122, 182, 15, 32, 157, 219, 13, 176}; | |
| .global .align 4 .u32 _ZZ12__raygen__rgE11N_STAB_RAYS = 32; | |
| .global .align 4 .f32 _ZZN3ngp13fibonacci_dirILj32EEEN3glm3vecILi3EfLNS1_9qualifierE0EEEjRKNS2_ILi2EfLS3_0EEEE12GOLDEN_RATIO = 0f3FCF1BBD; | |
| .global .align 4 .b8 __cudart_i2opi_f[24] = {65, 144, 67, 60, 153, 149, 98, 219, 192, 221, 52, 245, 209, 87, 39, 252, 41, 21, 68, 78, 110, 131, 249, 162}; | |
| .visible .entry __raygen__rg() | |
| { | |
| .local .align 4 .b8 __local_depot0[28]; | |
| .reg .b64 %SP; | |
| .reg .b64 %SPL; | |
| .reg .pred %p<16>; | |
| .reg .f32 %f<75>; | |
| .reg .b32 %r<151>; | |
| .reg .f64 %fd<3>; | |
| .reg .b64 %rd<76>; | |
| mov.u64 %SPL, __local_depot0; | |
| add.u64 %rd1, %SPL, 0; | |
| // begin inline asm | |
| call (%r23), _optix_get_launch_index_x, (); | |
| // end inline asm | |
| mov.u64 %rd67, 0; | |
| ld.const.u64 %rd27, [params]; | |
| cvta.to.global.u64 %rd28, %rd27; | |
| cvt.u64.u32 %rd2, %r23; | |
| mul.wide.u32 %rd29, %r23, 12; | |
| add.s64 %rd30, %rd28, %rd29; | |
| ld.global.f32 %f1, [%rd30]; | |
| ld.global.f32 %f2, [%rd30+4]; | |
| ld.global.f32 %f3, [%rd30+8]; | |
| shl.b32 %r1, %r23, 1; | |
| setp.eq.s32 %p1, %r1, 0; | |
| mov.u64 %rd71, -8846114313915602277; | |
| @%p1 bra $L__BB0_4; | |
| cvt.u64.u32 %rd66, %r1; | |
| mov.u64 %rd70, 1; | |
| mov.u64 %rd69, -2720673578348880933; | |
| mov.u64 %rd68, 6364136223846793005; | |
| $L__BB0_2: | |
| and.b64 %rd35, %rd66, 1; | |
| setp.eq.b64 %p2, %rd35, 1; | |
| mul.lo.s64 %rd36, %rd67, %rd68; | |
| add.s64 %rd37, %rd36, %rd69; | |
| selp.b64 %rd38, %rd68, 1, %p2; | |
| mul.lo.s64 %rd70, %rd38, %rd70; | |
| selp.b64 %rd67, %rd37, %rd67, %p2; | |
| add.s64 %rd39, %rd68, 1; | |
| mul.lo.s64 %rd69, %rd39, %rd69; | |
| mul.lo.s64 %rd68, %rd68, %rd68; | |
| shr.u64 %rd66, %rd66, 1; | |
| setp.ne.s64 %p3, %rd66, 0; | |
| @%p3 bra $L__BB0_2; | |
| mul.lo.s64 %rd71, %rd70, -8846114313915602277; | |
| $L__BB0_4: | |
| add.s64 %rd40, %rd67, %rd71; | |
| mul.lo.s64 %rd41, %rd40, 6364136223846793005; | |
| add.s64 %rd42, %rd41, -2720673578348880933; | |
| shr.u64 %rd43, %rd40, 18; | |
| xor.b64 %rd44, %rd43, %rd40; | |
| shr.u64 %rd45, %rd44, 27; | |
| cvt.u32.u64 %r31, %rd45; | |
| shr.u64 %rd46, %rd40, 59; | |
| cvt.u32.u64 %r32, %rd46; | |
| mov.u32 %r145, 0; | |
| shf.r.wrap.b32 %r33, %r31, %r31, %r32; | |
| shr.u32 %r34, %r33, 9; | |
| or.b32 %r35, %r34, 1065353216; | |
| mov.b32 %f13, %r35; | |
| add.f32 %f4, %f13, 0fBF800000; | |
| shr.u64 %rd47, %rd42, 18; | |
| xor.b64 %rd48, %rd47, %rd42; | |
| shr.u64 %rd49, %rd48, 27; | |
| cvt.u32.u64 %r36, %rd49; | |
| shr.u64 %rd50, %rd42, 59; | |
| cvt.u32.u64 %r37, %rd50; | |
| shf.r.wrap.b32 %r38, %r36, %r36, %r37; | |
| shr.u32 %r39, %r38, 9; | |
| or.b32 %r40, %r39, 1065353216; | |
| mov.b32 %f14, %r40; | |
| add.f32 %f5, %f14, 0fBF800000; | |
| ld.const.u64 %rd17, [params+16]; | |
| mov.u64 %rd51, __cudart_i2opi_f; | |
| $L__BB0_5: | |
| cvt.rn.f32.u32 %f6, %r145; | |
| div.rn.f32 %f15, %f6, 0f3FCF1BBD; | |
| add.f32 %f16, %f5, %f15; | |
| cvt.rmi.f32.f32 %f17, %f16; | |
| sub.f32 %f18, %f16, %f17; | |
| add.f32 %f19, %f18, 0fBF000000; | |
| mul.f32 %f7, %f19, 0f40C90FDB; | |
| mul.f32 %f20, %f7, 0f3F22F983; | |
| cvt.rni.s32.f32 %r150, %f20; | |
| cvt.rn.f32.s32 %f21, %r150; | |
| mov.f32 %f22, 0fBFC90FDA; | |
| fma.rn.f32 %f23, %f21, %f22, %f7; | |
| mov.f32 %f24, 0fB3A22168; | |
| fma.rn.f32 %f25, %f21, %f24, %f23; | |
| mov.f32 %f26, 0fA7C234C5; | |
| fma.rn.f32 %f74, %f21, %f26, %f25; | |
| abs.f32 %f9, %f7; | |
| setp.ltu.f32 %p4, %f9, 0f47CE4780; | |
| @%p4 bra $L__BB0_13; | |
| setp.eq.f32 %p5, %f9, 0f7F800000; | |
| @%p5 bra $L__BB0_12; | |
| bra.uni $L__BB0_7; | |
| $L__BB0_12: | |
| mov.f32 %f29, 0f00000000; | |
| mul.rn.f32 %f74, %f7, %f29; | |
| mov.u32 %r150, 0; | |
| bra.uni $L__BB0_13; | |
| $L__BB0_7: | |
| mov.b32 %r5, %f7; | |
| bfe.u32 %r42, %r5, 23, 8; | |
| add.s32 %r6, %r42, -128; | |
| shl.b32 %r43, %r5, 8; | |
| or.b32 %r7, %r43, -2147483648; | |
| shr.u32 %r8, %r6, 5; | |
| mov.u64 %rd75, 0; | |
| mov.u32 %r147, 0; | |
| mov.u64 %rd73, %rd51; | |
| mov.u64 %rd74, %rd1; | |
| $L__BB0_8: | |
| .pragma "nounroll"; | |
| ld.global.nc.u32 %r44, [%rd73]; | |
| mad.wide.u32 %rd53, %r44, %r7, %rd75; | |
| shr.u64 %rd75, %rd53, 32; | |
| st.local.u32 [%rd74], %rd53; | |
| add.s64 %rd74, %rd74, 4; | |
| add.s64 %rd73, %rd73, 4; | |
| add.s32 %r147, %r147, 1; | |
| setp.ne.s32 %p6, %r147, 6; | |
| @%p6 bra $L__BB0_8; | |
| st.local.u32 [%rd1+24], %rd75; | |
| mov.u32 %r45, 4; | |
| sub.s32 %r11, %r45, %r8; | |
| mov.u32 %r46, 6; | |
| sub.s32 %r47, %r46, %r8; | |
| mul.wide.s32 %rd54, %r47, 4; | |
| add.s64 %rd55, %rd1, %rd54; | |
| ld.local.u32 %r148, [%rd55]; | |
| ld.local.u32 %r149, [%rd55+-4]; | |
| and.b32 %r14, %r6, 31; | |
| setp.eq.s32 %p7, %r14, 0; | |
| @%p7 bra $L__BB0_11; | |
| mov.u32 %r48, 32; | |
| sub.s32 %r49, %r48, %r14; | |
| shr.u32 %r50, %r149, %r49; | |
| shl.b32 %r51, %r148, %r14; | |
| add.s32 %r148, %r50, %r51; | |
| mul.wide.s32 %rd56, %r11, 4; | |
| add.s64 %rd57, %rd1, %rd56; | |
| ld.local.u32 %r52, [%rd57]; | |
| shr.u32 %r53, %r52, %r49; | |
| shl.b32 %r54, %r149, %r14; | |
| add.s32 %r149, %r53, %r54; | |
| $L__BB0_11: | |
| and.b32 %r55, %r5, -2147483648; | |
| shr.u32 %r56, %r149, 30; | |
| shl.b32 %r57, %r148, 2; | |
| or.b32 %r58, %r56, %r57; | |
| shr.u32 %r59, %r58, 31; | |
| shr.u32 %r60, %r148, 30; | |
| add.s32 %r61, %r59, %r60; | |
| neg.s32 %r62, %r61; | |
| setp.eq.s32 %p8, %r55, 0; | |
| selp.b32 %r150, %r61, %r62, %p8; | |
| setp.ne.s32 %p9, %r59, 0; | |
| xor.b32 %r63, %r55, -2147483648; | |
| selp.b32 %r64, %r63, %r55, %p9; | |
| selp.b32 %r65, -1, 0, %p9; | |
| xor.b32 %r66, %r58, %r65; | |
| shl.b32 %r67, %r149, 2; | |
| xor.b32 %r68, %r67, %r65; | |
| cvt.u64.u32 %rd58, %r66; | |
| cvt.u64.u32 %rd59, %r68; | |
| bfi.b64 %rd60, %rd58, %rd59, 32, 32; | |
| cvt.rn.f64.s64 %fd1, %rd60; | |
| mul.f64 %fd2, %fd1, 0d3BF921FB54442D19; | |
| cvt.rn.f32.f64 %f27, %fd2; | |
| setp.eq.s32 %p10, %r64, 0; | |
| neg.f32 %f28, %f27; | |
| selp.f32 %f74, %f27, %f28, %p10; | |
| $L__BB0_13: | |
| add.f32 %f39, %f6, 0f3FAA3D71; | |
| div.rn.f32 %f40, %f39, 0f4206A3D7; | |
| add.f32 %f41, %f4, %f40; | |
| cvt.rmi.f32.f32 %f42, %f41; | |
| sub.f32 %f43, %f41, %f42; | |
| add.f32 %f44, %f43, %f43; | |
| mov.f32 %f45, 0f3F800000; | |
| sub.f32 %f35, %f45, %f44; | |
| mul.f32 %f46, %f35, %f35; | |
| sub.f32 %f47, %f45, %f46; | |
| mov.f32 %f38, 0f00000000; | |
| max.f32 %f48, %f47, %f38; | |
| sqrt.rn.f32 %f49, %f48; | |
| mul.f32 %f50, %f74, %f74; | |
| mov.f32 %f51, 0fBAB607ED; | |
| mov.f32 %f52, 0f37CBAC00; | |
| fma.rn.f32 %f53, %f52, %f50, %f51; | |
| mov.f32 %f54, 0f3D2AAABB; | |
| fma.rn.f32 %f55, %f53, %f50, %f54; | |
| mov.f32 %f56, 0fBEFFFFFF; | |
| fma.rn.f32 %f57, %f55, %f50, %f56; | |
| fma.rn.f32 %f58, %f57, %f50, %f45; | |
| fma.rn.f32 %f59, %f50, %f74, %f38; | |
| mov.f32 %f60, 0f3C0885E4; | |
| mov.f32 %f61, 0fB94D4153; | |
| fma.rn.f32 %f62, %f61, %f50, %f60; | |
| mov.f32 %f63, 0fBE2AAAA8; | |
| fma.rn.f32 %f64, %f62, %f50, %f63; | |
| fma.rn.f32 %f65, %f64, %f59, %f74; | |
| mov.u32 %r108, 1; | |
| and.b32 %r141, %r150, 1; | |
| setp.eq.b32 %p11, %r141, 1; | |
| mov.u32 %r140, 0; | |
| selp.f32 %f66, %f58, %f65, %p11; | |
| selp.f32 %f67, %f65, %f58, %p11; | |
| and.b32 %r142, %r150, 2; | |
| setp.eq.s32 %p12, %r142, 0; | |
| neg.f32 %f68, %f66; | |
| selp.f32 %f69, %f66, %f68, %p12; | |
| add.s32 %r143, %r150, 1; | |
| and.b32 %r144, %r143, 2; | |
| setp.eq.s32 %p13, %r144, 0; | |
| neg.f32 %f70, %f67; | |
| selp.f32 %f71, %f67, %f70, %p13; | |
| mul.f32 %f33, %f49, %f71; | |
| mul.f32 %f34, %f49, %f69; | |
| mov.f32 %f37, 0f5A0E1BCA; | |
| mov.u32 %r103, 255; | |
| mov.u32 %r104, 5; | |
| // begin inline asm | |
| call(%r146,%r71,%r72,%r73,%r74,%r75,%r76,%r77,%r78,%r79,%r80,%r81,%r82,%r83,%r84,%r85,%r86,%r87,%r88,%r89,%r90,%r91,%r92,%r93,%r94,%r95,%r96,%r97,%r98,%r99,%r100,%r101),_optix_trace_typed_32,(%r140,%rd17,%f1,%f2,%f3,%f33,%f34,%f35,%f38,%f37,%f38,%r103,%r104,%r140,%r108,%r140,%r108,%r146,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140,%r140); | |
| // end inline asm | |
| setp.eq.s32 %p14, %r146, 0; | |
| add.s32 %r145, %r145, 1; | |
| @%p14 bra $L__BB0_16; | |
| setp.lt.u32 %p15, %r145, 32; | |
| @%p15 bra $L__BB0_5; | |
| ld.const.u64 %rd62, [params+8]; | |
| cvta.to.global.u64 %rd63, %rd62; | |
| shl.b64 %rd64, %rd2, 2; | |
| add.s64 %rd65, %rd63, %rd64; | |
| ld.global.f32 %f72, [%rd65]; | |
| neg.f32 %f73, %f72; | |
| st.global.f32 [%rd65], %f73; | |
| $L__BB0_16: | |
| ret; | |
| } | |
| // .globl __miss__ms | |
| .visible .entry __miss__ms() | |
| { | |
| .reg .b32 %r<3>; | |
| mov.u32 %r2, 0; | |
| // begin inline asm | |
| call _optix_set_payload, (%r2, %r2); | |
| // end inline asm | |
| ret; | |
| } | |
| // .globl __closesthit__ch | |
| .visible .entry __closesthit__ch() | |
| { | |
| .reg .b32 %r<3>; | |
| mov.u32 %r1, 0; | |
| mov.u32 %r2, 1; | |
| // begin inline asm | |
| call _optix_set_payload, (%r1, %r2); | |
| // end inline asm | |
| ret; | |
| } | |