| // |
| // Generated by NVIDIA NVVM Compiler |
| // |
| // Compiler Build ID: CL-29618528 |
| // Cuda compilation tools, release 11.2, V11.2.152 |
| // Based on NVVM 7.0.1 |
| // |
|
|
| .version 7.2 |
| .target sm_52 |
| .address_size 64 |
|
|
| // .globl __raygen__rg |
| .const .align 8 .b8 params[32]; |
| .global .align 4 .f32 _ZZN4tcnn19gaussian_cdf_approxEffE20MAGIC_SIGMOID_FACTOR = 0f3F4ABDDD; |
| .global .align 4 .f32 _ZZN4tcnn30gaussian_cdf_approx_derivativeEffE20MAGIC_SIGMOID_FACTOR = 0f3F4ABDDD; |
| .global .align 4 .b8 _ZZN3ngp5sobolEjjE10directions[640] = {0, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 160, 0, 0, 0, 240, 0, 0, 0, 136, 0, 0, 0, 204, 0, 0, 0, 170, 0, 0, 0, 255, 0, 0, 128, 128, 0, 0, 192, 192, 0, 0, 160, 160, 0, 0, 240, 240, 0, 0, 136, 136, 0, 0, 204, 204, 0, 0, 170, 170, 0, 0, 255, 255, 0, 128, 0, 128, 0, 192, 0, 192, 0, 160, 0, 160, 0, 240, 0, 240, 0, 136, 0, 136, 0, 204, 0, 204, 0, 170, 0, 170, 0, 255, 0, 255, 128, 128, 128, 128, 192, 192, 192, 192, 160, 160, 160, 160, 240, 240, 240, 240, 136, 136, 136, 136, 204, 204, 204, 204, 170, 170, 170, 170, 255, 255, 255, 255, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 96, 0, 0, 0, 144, 0, 0, 0, 232, 0, 0, 0, 92, 0, 0, 0, 142, 0, 0, 0, 197, 0, 0, 128, 104, 0, 0, 192, 156, 0, 0, 96, 238, 0, 0, 144, 85, 0, 0, 104, 128, 0, 0, 156, 192, 0, 0, 238, 96, 0, 0, 85, 144, 0, 128, 128, 232, 0, 192, 192, 92, 0, 96, 96, 142, 0, 144, 144, 197, 0, 232, 104, 104, 0, 92, 156, 156, 0, 142, 238, 238, 0, 197, 85, 85, 128, 232, 0, 128, 192, 92, 0, 192, 96, 142, 0, 96, 144, 197, 0, 144, 104, 104, 0, 232, 156, 156, 0, 92, 238, 238, 0, 142, 85, 85, 0, 197, 0, 0, 0, 128, 0, 0, 0, 192, 0, 0, 0, 32, 0, 0, 0, 80, 0, 0, 0, 248, 0, 0, 0, 116, 0, 0, 0, 162, 0, 0, 0, 147, 0, 0, 128, 216, 0, 0, 64, 37, 0, 0, 224, 89, 0, 0, 208, 230, 0, 0, 8, 120, 0, 0, 12, 180, 0, 0, 2, 130, 0, 0, 5, 195, 0, 128, 143, 32, 0, 64, 71, 81, 0, 32, 234, 251, 0, 48, 217, 117, 0, 136, 133, 160, 0, 84, 78, 145, 0, 158, 231, 219, 0, 109, 219, 37, 128, 0, 128, 88, 192, 0, 64, 229, 32, 0, 224, 121, 80, 0, 208, 182, 248, 0, 8, 128, 116, 0, 12, 192, 162, 0, 2, 32, 147, 0, 5, 80, 0, 0, 0, 128, 0, 0, 0, 64, 0, 0, 0, 32, 0, 0, 0, 176, 0, 0, 0, 248, 0, 0, 0, 220, 0, 0, 0, 122, 0, 0, 0, 157, 0, 0, 128, 90, 0, 0, 192, 47, 0, 0, 96, 161, 0, 0, 176, 240, 0, 0, 136, 218, 0, 0, 196, 111, 0, 0, 98, 129, 0, 0, 187, 64, 0, 128, 135, 34, 0, 192, 201, 179, 0, 160, 101, 251, 0, 208, 178, 221, 0, 40, 2, 120, 0, 60, 11, 156, 0, 182, 15, 90, 0, 219, 13, 45, 128, 128, 135, 162, 64, 192, 201, 243, 32, 160, 101, 219, 176, 208, 178, 109, 248, 40, 2, 128, 220, 60, 11, 64, 122, 182, 15, 32, 157, 219, 13, 176}; |
| .global .align 4 .u32 _ZZ12__raygen__rgE7N_PATHS = 32; |
| .global .align 4 .u32 _ZZ12__raygen__rgE9N_BOUNCES = 4; |
| .global .align 4 .b8 __cudart_i2opi_f[24] = {65, 144, 67, 60, 153, 149, 98, 219, 192, 221, 52, 245, 209, 87, 39, 252, 41, 21, 68, 78, 110, 131, 249, 162}; |
|
|
| .visible .entry __raygen__rg() |
| { |
| .local .align 4 .b8 __local_depot0[28]; |
| .reg .b64 %SP; |
| .reg .b64 %SPL; |
| .reg .pred %p<125>; |
| .reg .f32 %f<701>; |
| .reg .b32 %r<741>; |
| .reg .f64 %fd<15>; |
| .reg .b64 %rd<312>; |
|
|
|
|
| mov.u64 %SPL, __local_depot0; |
| add.u64 %rd1, %SPL, 0; |
| // begin inline asm |
| call (%r191), _optix_get_launch_index_x, (); |
| // end inline asm |
| mov.u64 %rd280, 0; |
| ld.const.u64 %rd106, [params]; |
| cvta.to.global.u64 %rd107, %rd106; |
| cvt.u64.u32 %rd2, %r191; |
| mul.wide.u32 %rd108, %r191, 12; |
| add.s64 %rd109, %rd107, %rd108; |
| ld.global.f32 %f1, [%rd109]; |
| ld.global.f32 %f2, [%rd109+4]; |
| ld.global.f32 %f3, [%rd109+8]; |
| shl.b32 %r1, %r191, 9; |
| setp.eq.s32 %p5, %r1, 0; |
| mov.u64 %rd281, -8846114313915602277; |
| @%p5 bra LBB0_4; |
|
|
| cvt.u64.u32 %rd279, %r1; |
| mov.u64 %rd280, 0; |
| mov.u64 %rd277, 1; |
| mov.u64 %rd276, -2720673578348880933; |
| mov.u64 %rd275, 6364136223846793005; |
|
|
| LBB0_2: |
| and.b64 %rd114, %rd279, 1; |
| setp.eq.b64 %p6, %rd114, 1; |
| mul.lo.s64 %rd115, %rd275, %rd280; |
| add.s64 %rd116, %rd115, %rd276; |
| selp.b64 %rd280, %rd116, %rd280, %p6; |
| selp.b64 %rd117, %rd275, 1, %p6; |
| mul.lo.s64 %rd277, %rd117, %rd277; |
| add.s64 %rd118, %rd275, 1; |
| mul.lo.s64 %rd276, %rd118, %rd276; |
| mul.lo.s64 %rd275, %rd275, %rd275; |
| shr.u64 %rd279, %rd279, 1; |
| setp.ne.s64 %p7, %rd279, 0; |
| @%p7 bra LBB0_2; |
|
|
| mul.lo.s64 %rd281, %rd277, -8846114313915602277; |
|
|
| LBB0_4: |
| add.s64 %rd310, %rd281, %rd280; |
| ld.const.u64 %rd18, [params+24]; |
| ld.const.u64 %rd119, [params+8]; |
| cvta.to.global.u64 %rd19, %rd119; |
| mov.u32 %r693, 0; |
| mov.f32 %f172, 0f3F800000; |
| mov.f32 %f176, 0fBFC90FDA; |
| mov.f32 %f178, 0fB3A22168; |
| mov.f32 %f180, 0fA7C234C5; |
| add.s64 %rd22, %rd1, 24; |
| mov.u32 %r694, %r693; |
|
|
| LBB0_5: |
| mul.lo.s64 %rd120, %rd310, 6364136223846793005; |
| add.s64 %rd21, %rd120, -2720673578348880933; |
| shr.u64 %rd121, %rd310, 18; |
| xor.b64 %rd122, %rd121, %rd310; |
| shr.u64 %rd123, %rd122, 27; |
| cvt.u32.u64 %r200, %rd123; |
| shr.u64 %rd124, %rd310, 59; |
| cvt.u32.u64 %r201, %rd124; |
| shf.r.wrap.b32 %r202, %r200, %r200, %r201; |
| shr.u32 %r203, %r202, 9; |
| or.b32 %r204, %r203, 1065353216; |
| mov.b32 %f167, %r204; |
| add.f32 %f168, %f167, 0fBF800000; |
| shr.u64 %rd125, %rd21, 18; |
| xor.b64 %rd126, %rd125, %rd21; |
| shr.u64 %rd127, %rd126, 27; |
| cvt.u32.u64 %r205, %rd127; |
| shr.u64 %rd128, %rd21, 59; |
| cvt.u32.u64 %r206, %rd128; |
| shf.r.wrap.b32 %r207, %r205, %r205, %r206; |
| shr.u32 %r208, %r207, 9; |
| or.b32 %r209, %r208, 1065353216; |
| mov.b32 %f169, %r209; |
| add.f32 %f170, %f169, 0fBF800000; |
| add.f32 %f171, %f168, %f168; |
| sub.f32 %f4, %f172, %f171; |
| add.f32 %f173, %f170, 0fBF000000; |
| mul.f32 %f5, %f173, 0f40C90FDB; |
| mul.f32 %f174, %f5, 0f3F22F983; |
| cvt.rni.s32.f32 %r702, %f174; |
| cvt.rn.f32.s32 %f175, %r702; |
| fma.rn.f32 %f177, %f175, %f176, %f5; |
| fma.rn.f32 %f179, %f175, %f178, %f177; |
| fma.rn.f32 %f664, %f175, %f180, %f179; |
| abs.f32 %f7, %f5; |
| setp.leu.f32 %p8, %f7, 0f47CE4780; |
| @%p8 bra LBB0_15; |
|
|
| setp.eq.f32 %p9, %f7, 0f7F800000; |
| @%p9 bra LBB0_14; |
| bra.uni LBB0_7; |
|
|
| LBB0_14: |
| mov.f32 %f183, 0f00000000; |
| mul.rn.f32 %f664, %f5, %f183; |
| bra.uni LBB0_15; |
|
|
| LBB0_7: |
| mov.b32 %r6, %f5; |
| bfe.u32 %r211, %r6, 23, 8; |
| add.s32 %r7, %r211, -128; |
| shl.b32 %r212, %r6, 8; |
| or.b32 %r8, %r212, -2147483648; |
| shr.u32 %r9, %r7, 5; |
| mov.u64 %rd285, 0; |
| mov.u32 %r696, 0; |
| mov.u64 %rd283, __cudart_i2opi_f; |
| mov.u64 %rd284, %rd1; |
|
|
| LBB0_8: |
| .pragma "nounroll"; |
| ld.global.nc.u32 %r213, [%rd283]; |
| mad.wide.u32 %rd131, %r213, %r8, %rd285; |
| shr.u64 %rd285, %rd131, 32; |
| st.local.u32 [%rd284], %rd131; |
| add.s64 %rd284, %rd284, 4; |
| add.s64 %rd283, %rd283, 4; |
| add.s32 %r696, %r696, 1; |
| setp.ne.s32 %p10, %r696, 6; |
| @%p10 bra LBB0_8; |
|
|
| and.b32 %r12, %r6, -2147483648; |
| st.local.u32 [%rd22], %rd285; |
| mul.wide.u32 %rd132, %r9, 4; |
| sub.s64 %rd29, %rd1, %rd132; |
| ld.local.u32 %r697, [%rd29+24]; |
| ld.local.u32 %r698, [%rd29+20]; |
| and.b32 %r15, %r7, 31; |
| setp.eq.s32 %p11, %r15, 0; |
| @%p11 bra LBB0_11; |
|
|
| mov.u32 %r214, 32; |
| sub.s32 %r215, %r214, %r15; |
| shr.u32 %r216, %r698, %r215; |
| shl.b32 %r217, %r697, %r15; |
| add.s32 %r697, %r216, %r217; |
| ld.local.u32 %r218, [%rd29+16]; |
| shr.u32 %r219, %r218, %r215; |
| shl.b32 %r220, %r698, %r15; |
| add.s32 %r698, %r219, %r220; |
|
|
| LBB0_11: |
| shr.u32 %r221, %r698, 30; |
| shl.b32 %r222, %r697, 2; |
| or.b32 %r700, %r221, %r222; |
| shl.b32 %r701, %r698, 2; |
| shr.u32 %r223, %r700, 31; |
| shr.u32 %r224, %r697, 30; |
| add.s32 %r22, %r223, %r224; |
| setp.eq.s32 %p12, %r223, 0; |
| mov.u32 %r699, %r12; |
| @%p12 bra LBB0_13; |
|
|
| not.b32 %r225, %r700; |
| neg.s32 %r23, %r701; |
| setp.eq.s32 %p13, %r701, 0; |
| selp.u32 %r226, 1, 0, %p13; |
| add.s32 %r700, %r226, %r225; |
| xor.b32 %r699, %r12, -2147483648; |
| mov.u32 %r701, %r23; |
|
|
| LBB0_13: |
| cvt.u64.u32 %rd133, %r700; |
| cvt.u64.u32 %rd134, %r701; |
| bfi.b64 %rd135, %rd133, %rd134, 32, 32; |
| cvt.rn.f64.s64 %fd1, %rd135; |
| mul.f64 %fd2, %fd1, 0d3BF921FB54442D19; |
| cvt.rn.f32.f64 %f181, %fd2; |
| neg.f32 %f182, %f181; |
| setp.eq.s32 %p14, %r699, 0; |
| selp.f32 %f664, %f181, %f182, %p14; |
| setp.eq.s32 %p15, %r12, 0; |
| neg.s32 %r227, %r22; |
| selp.b32 %r702, %r22, %r227, %p15; |
|
|
| LBB0_15: |
| mul.f32 %f193, %f4, %f4; |
| sub.f32 %f195, %f172, %f193; |
| mov.f32 %f192, 0f00000000; |
| max.f32 %f196, %f195, %f192; |
| sqrt.rn.f32 %f197, %f196; |
| mul.f32 %f198, %f664, %f664; |
| mov.f32 %f199, 0fBAB607ED; |
| mov.f32 %f200, 0f37CBAC00; |
| fma.rn.f32 %f201, %f200, %f198, %f199; |
| mov.f32 %f202, 0f3D2AAABB; |
| fma.rn.f32 %f203, %f201, %f198, %f202; |
| mov.f32 %f204, 0fBEFFFFFF; |
| fma.rn.f32 %f205, %f203, %f198, %f204; |
| fma.rn.f32 %f206, %f205, %f198, %f172; |
| fma.rn.f32 %f207, %f198, %f664, %f192; |
| mov.f32 %f208, 0f3C0885E4; |
| mov.f32 %f669, 0fB94D4153; |
| fma.rn.f32 %f210, %f669, %f198, %f208; |
| mov.f32 %f211, 0fBE2AAAA8; |
| fma.rn.f32 %f212, %f210, %f198, %f211; |
| fma.rn.f32 %f213, %f212, %f207, %f664; |
| mov.u32 %r266, 1; |
| and.b32 %r299, %r702, 1; |
| setp.eq.b32 %p16, %r299, 1; |
| mov.u32 %r298, 0; |
| selp.f32 %f214, %f206, %f213, %p16; |
| selp.f32 %f215, %f213, %f206, %p16; |
| and.b32 %r300, %r702, 2; |
| setp.eq.s32 %p17, %r300, 0; |
| neg.f32 %f216, %f214; |
| selp.f32 %f217, %f214, %f216, %p17; |
| add.s32 %r301, %r702, 1; |
| and.b32 %r302, %r301, 2; |
| setp.eq.s32 %p18, %r302, 0; |
| neg.f32 %f218, %f215; |
| selp.f32 %f219, %f215, %f218, %p18; |
| mul.f32 %f187, %f197, %f219; |
| mul.f32 %f188, %f197, %f217; |
| mul.lo.s64 %rd137, %rd21, 6364136223846793005; |
| add.s64 %rd310, %rd137, -2720673578348880933; |
| mov.f32 %f191, 0f5A0E1BCA; |
| mov.u32 %r261, 255; |
| // begin inline asm |
| call(%r228,%r229,%r230,%r231,%r232,%r233,%r234,%r235,%r236,%r237,%r238,%r239,%r240,%r241,%r242,%r243,%r244,%r245,%r246,%r247,%r248,%r249,%r250,%r251,%r252,%r253,%r254,%r255,%r256,%r257,%r258,%r259),_optix_trace_typed_32,(%r298,%rd18,%f1,%f2,%f3,%f187,%f188,%f4,%f192,%f191,%f192,%r261,%r266,%r298,%r266,%r298,%r266,%r695,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298); |
| // end inline asm |
| setp.eq.s32 %p19, %r228, -1; |
| @%p19 bra LBB0_124; |
|
|
| mul.wide.u32 %rd138, %r228, 36; |
| add.s64 %rd139, %rd19, %rd138; |
| ld.global.f32 %f220, [%rd139]; |
| ld.global.f32 %f221, [%rd139+12]; |
| sub.f32 %f222, %f221, %f220; |
| ld.global.f32 %f223, [%rd139+4]; |
| ld.global.f32 %f224, [%rd139+16]; |
| sub.f32 %f225, %f224, %f223; |
| ld.global.f32 %f226, [%rd139+8]; |
| ld.global.f32 %f227, [%rd139+20]; |
| sub.f32 %f228, %f227, %f226; |
| ld.global.f32 %f229, [%rd139+24]; |
| sub.f32 %f230, %f229, %f220; |
| ld.global.f32 %f231, [%rd139+28]; |
| sub.f32 %f232, %f231, %f223; |
| ld.global.f32 %f233, [%rd139+32]; |
| sub.f32 %f234, %f233, %f226; |
| sub.f32 %f235, %f1, %f220; |
| sub.f32 %f236, %f2, %f223; |
| sub.f32 %f237, %f3, %f226; |
| mul.f32 %f238, %f225, %f234; |
| mul.f32 %f239, %f228, %f232; |
| sub.f32 %f240, %f238, %f239; |
| mul.f32 %f241, %f228, %f230; |
| mul.f32 %f242, %f222, %f234; |
| sub.f32 %f243, %f241, %f242; |
| mul.f32 %f244, %f222, %f232; |
| mul.f32 %f245, %f225, %f230; |
| sub.f32 %f246, %f244, %f245; |
| mul.f32 %f247, %f4, %f236; |
| mul.f32 %f248, %f188, %f237; |
| sub.f32 %f249, %f247, %f248; |
| mul.f32 %f250, %f187, %f237; |
| mul.f32 %f251, %f4, %f235; |
| sub.f32 %f252, %f250, %f251; |
| mul.f32 %f253, %f188, %f235; |
| mul.f32 %f254, %f187, %f236; |
| sub.f32 %f255, %f253, %f254; |
| mul.f32 %f256, %f187, %f240; |
| mul.f32 %f257, %f188, %f243; |
| mul.f32 %f258, %f4, %f246; |
| add.f32 %f259, %f258, %f257; |
| add.f32 %f260, %f256, %f259; |
| rcp.rn.f32 %f261, %f260; |
| mul.f32 %f262, %f234, %f255; |
| fma.rn.f32 %f263, %f232, %f252, %f262; |
| fma.rn.f32 %f264, %f230, %f249, %f263; |
| mul.f32 %f265, %f261, %f264; |
| mul.f32 %f266, %f228, %f255; |
| fma.rn.f32 %f267, %f225, %f252, %f266; |
| fma.rn.f32 %f268, %f222, %f249, %f267; |
| mul.f32 %f269, %f261, %f268; |
| mul.f32 %f270, %f243, %f236; |
| fma.rn.f32 %f271, %f246, %f237, %f270; |
| fma.rn.f32 %f272, %f235, %f240, %f271; |
| mul.f32 %f13, %f272, %f261; |
| setp.gt.f32 %p20, %f265, 0f80000000; |
| setp.lt.f32 %p21, %f265, 0fBF800000; |
| or.pred %p22, %p20, %p21; |
| setp.lt.f32 %p23, %f269, 0f00000000; |
| or.pred %p24, %p23, %p22; |
| sub.f32 %f273, %f269, %f265; |
| setp.gt.f32 %p25, %f273, 0f3F800000; |
| or.pred %p1, %p25, %p24; |
| neg.f32 %f274, %f258; |
| sub.f32 %f275, %f274, %f257; |
| sub.f32 %f276, %f275, %f256; |
| mov.b32 %r303, %f276; |
| and.b32 %r304, %r303, -2147483648; |
| or.b32 %r305, %r304, 1065353216; |
| mov.b32 %f277, %r305; |
| mul.f32 %f666, %f240, %f277; |
| mul.f32 %f665, %f243, %f277; |
| mul.f32 %f667, %f246, %f277; |
| mul.f32 %f278, %f667, %f667; |
| fma.rn.f32 %f279, %f665, %f665, %f278; |
| fma.rn.f32 %f17, %f666, %f666, %f279; |
| setp.leu.f32 %p26, %f17, 0f00000000; |
| @%p26 bra LBB0_18; |
|
|
| sqrt.rn.f32 %f280, %f17; |
| div.rn.f32 %f666, %f666, %f280; |
| div.rn.f32 %f665, %f665, %f280; |
| div.rn.f32 %f667, %f667, %f280; |
|
|
| LBB0_18: |
| mov.f32 %f281, 0fBA83126F; |
| sub.f32 %f282, %f281, %f13; |
| setp.gt.f32 %p27, %f13, 0f80000000; |
| selp.f32 %f283, 0f7F7FFFFF, %f282, %p1; |
| selp.f32 %f284, 0f7F7FFFFF, %f283, %p27; |
| max.f32 %f286, %f192, %f284; |
| fma.rn.f32 %f24, %f187, %f286, %f1; |
| fma.rn.f32 %f25, %f188, %f286, %f2; |
| fma.rn.f32 %f26, %f4, %f286, %f3; |
| mul.lo.s64 %rd140, %rd310, 6364136223846793005; |
| add.s64 %rd31, %rd140, -2720673578348880933; |
| shr.u64 %rd141, %rd310, 18; |
| xor.b64 %rd142, %rd141, %rd310; |
| shr.u64 %rd143, %rd142, 27; |
| cvt.u32.u64 %r306, %rd143; |
| shr.u64 %rd144, %rd310, 59; |
| cvt.u32.u64 %r307, %rd144; |
| shf.r.wrap.b32 %r308, %r306, %r306, %r307; |
| shr.u32 %r309, %r308, 9; |
| or.b32 %r310, %r309, 1065353216; |
| mov.b32 %f287, %r310; |
| add.f32 %f288, %f287, 0fBF800000; |
| shr.u64 %rd145, %rd31, 18; |
| xor.b64 %rd146, %rd145, %rd31; |
| shr.u64 %rd147, %rd146, 27; |
| cvt.u32.u64 %r311, %rd147; |
| shr.u64 %rd148, %rd31, 59; |
| cvt.u32.u64 %r312, %rd148; |
| shf.r.wrap.b32 %r313, %r311, %r311, %r312; |
| shr.u32 %r314, %r313, 9; |
| or.b32 %r315, %r314, 1065353216; |
| mov.b32 %f289, %r315; |
| add.f32 %f290, %f289, 0fBF800000; |
| sqrt.rn.f32 %f27, %f288; |
| mul.f32 %f28, %f290, 0f40C90FDB; |
| mul.f32 %f291, %f28, 0f3F22F983; |
| cvt.rni.s32.f32 %r714, %f291; |
| cvt.rn.f32.s32 %f292, %r714; |
| fma.rn.f32 %f294, %f292, %f176, %f28; |
| fma.rn.f32 %f296, %f292, %f178, %f294; |
| fma.rn.f32 %f671, %f292, %f180, %f296; |
| abs.f32 %f30, %f28; |
| setp.leu.f32 %p28, %f30, 0f47CE4780; |
| mov.u32 %r708, %r714; |
| mov.f32 %f668, %f671; |
| @%p28 bra LBB0_28; |
|
|
| setp.eq.f32 %p29, %f30, 0f7F800000; |
| @%p29 bra LBB0_27; |
| bra.uni LBB0_20; |
|
|
| LBB0_27: |
| mul.rn.f32 %f668, %f28, %f192; |
| mov.u32 %r708, %r714; |
| bra.uni LBB0_28; |
|
|
| LBB0_20: |
| mov.b32 %r33, %f28; |
| bfe.u32 %r316, %r33, 23, 8; |
| add.s32 %r34, %r316, -128; |
| shl.b32 %r317, %r33, 8; |
| or.b32 %r35, %r317, -2147483648; |
| shr.u32 %r36, %r34, 5; |
| mov.u64 %rd287, 0; |
| mov.u64 %rd286, %rd1; |
| mov.u64 %rd288, %rd287; |
|
|
| LBB0_21: |
| .pragma "nounroll"; |
| shl.b64 %rd151, %rd287, 2; |
| mov.u64 %rd152, __cudart_i2opi_f; |
| add.s64 %rd153, %rd152, %rd151; |
| ld.global.nc.u32 %r318, [%rd153]; |
| mad.wide.u32 %rd154, %r318, %r35, %rd288; |
| shr.u64 %rd288, %rd154, 32; |
| st.local.u32 [%rd286], %rd154; |
| cvt.u32.u64 %r319, %rd287; |
| add.s32 %r320, %r319, 1; |
| cvt.u64.u32 %rd287, %r320; |
| mul.wide.u32 %rd155, %r319, 4; |
| add.s64 %rd156, %rd1, %rd155; |
| add.s64 %rd286, %rd156, 4; |
| setp.ne.s32 %p30, %r320, 6; |
| @%p30 bra LBB0_21; |
|
|
| and.b32 %r37, %r33, -2147483648; |
| st.local.u32 [%rd22], %rd288; |
| mul.wide.u32 %rd157, %r36, 4; |
| sub.s64 %rd39, %rd1, %rd157; |
| ld.local.u32 %r703, [%rd39+24]; |
| ld.local.u32 %r704, [%rd39+20]; |
| and.b32 %r40, %r34, 31; |
| setp.eq.s32 %p31, %r40, 0; |
| @%p31 bra LBB0_24; |
|
|
| mov.u32 %r321, 32; |
| sub.s32 %r322, %r321, %r40; |
| shr.u32 %r323, %r704, %r322; |
| shl.b32 %r324, %r703, %r40; |
| add.s32 %r703, %r323, %r324; |
| ld.local.u32 %r325, [%rd39+16]; |
| shr.u32 %r326, %r325, %r322; |
| shl.b32 %r327, %r704, %r40; |
| add.s32 %r704, %r326, %r327; |
|
|
| LBB0_24: |
| shr.u32 %r328, %r704, 30; |
| shl.b32 %r329, %r703, 2; |
| or.b32 %r706, %r328, %r329; |
| shl.b32 %r707, %r704, 2; |
| shr.u32 %r330, %r706, 31; |
| shr.u32 %r331, %r703, 30; |
| add.s32 %r47, %r330, %r331; |
| setp.eq.s32 %p32, %r330, 0; |
| mov.u32 %r705, %r37; |
| @%p32 bra LBB0_26; |
|
|
| not.b32 %r332, %r706; |
| neg.s32 %r48, %r707; |
| setp.eq.s32 %p33, %r707, 0; |
| selp.u32 %r333, 1, 0, %p33; |
| add.s32 %r706, %r333, %r332; |
| xor.b32 %r705, %r37, -2147483648; |
| mov.u32 %r707, %r48; |
|
|
| LBB0_26: |
| cvt.u64.u32 %rd158, %r706; |
| cvt.u64.u32 %rd159, %r707; |
| bfi.b64 %rd160, %rd158, %rd159, 32, 32; |
| cvt.rn.f64.s64 %fd3, %rd160; |
| mul.f64 %fd4, %fd3, 0d3BF921FB54442D19; |
| cvt.rn.f32.f64 %f298, %fd4; |
| neg.f32 %f299, %f298; |
| setp.eq.s32 %p34, %r705, 0; |
| selp.f32 %f668, %f298, %f299, %p34; |
| setp.eq.s32 %p35, %r37, 0; |
| neg.s32 %r334, %r47; |
| selp.b32 %r708, %r47, %r334, %p35; |
|
|
| LBB0_28: |
| add.s32 %r56, %r708, 1; |
| and.b32 %r57, %r56, 1; |
| setp.eq.s32 %p36, %r57, 0; |
| selp.f32 %f34, %f668, 0f3F800000, %p36; |
| mul.rn.f32 %f35, %f668, %f668; |
| @%p36 bra LBB0_30; |
|
|
| fma.rn.f32 %f669, %f200, %f35, %f199; |
|
|
| LBB0_30: |
| selp.f32 %f304, 0f3C0885E4, 0f3D2AAABB, %p36; |
| fma.rn.f32 %f305, %f669, %f35, %f304; |
| selp.f32 %f306, 0fBE2AAAA8, 0fBEFFFFFF, %p36; |
| fma.rn.f32 %f307, %f305, %f35, %f306; |
| fma.rn.f32 %f309, %f35, %f34, %f192; |
| fma.rn.f32 %f670, %f307, %f309, %f34; |
| and.b32 %r335, %r56, 2; |
| setp.eq.s32 %p38, %r335, 0; |
| @%p38 bra LBB0_32; |
|
|
| mov.f32 %f311, 0fBF800000; |
| fma.rn.f32 %f670, %f670, %f311, %f192; |
|
|
| LBB0_32: |
| @%p28 bra LBB0_42; |
|
|
| setp.eq.f32 %p40, %f30, 0f7F800000; |
| @%p40 bra LBB0_41; |
| bra.uni LBB0_34; |
|
|
| LBB0_41: |
| mul.rn.f32 %f671, %f28, %f192; |
| bra.uni LBB0_42; |
|
|
| LBB0_34: |
| mov.b32 %r58, %f28; |
| bfe.u32 %r336, %r58, 23, 8; |
| add.s32 %r59, %r336, -128; |
| shl.b32 %r337, %r58, 8; |
| or.b32 %r60, %r337, -2147483648; |
| shr.u32 %r61, %r59, 5; |
| mov.u64 %rd290, 0; |
| mov.u64 %rd289, %rd1; |
| mov.u64 %rd291, %rd290; |
|
|
| LBB0_35: |
| .pragma "nounroll"; |
| shl.b64 %rd163, %rd290, 2; |
| mov.u64 %rd164, __cudart_i2opi_f; |
| add.s64 %rd165, %rd164, %rd163; |
| ld.global.nc.u32 %r338, [%rd165]; |
| mad.wide.u32 %rd166, %r338, %r60, %rd291; |
| shr.u64 %rd291, %rd166, 32; |
| st.local.u32 [%rd289], %rd166; |
| cvt.u32.u64 %r339, %rd290; |
| add.s32 %r340, %r339, 1; |
| cvt.u64.u32 %rd290, %r340; |
| mul.wide.u32 %rd167, %r339, 4; |
| add.s64 %rd168, %rd1, %rd167; |
| add.s64 %rd289, %rd168, 4; |
| setp.ne.s32 %p41, %r340, 6; |
| @%p41 bra LBB0_35; |
|
|
| and.b32 %r62, %r58, -2147483648; |
| st.local.u32 [%rd22], %rd291; |
| mul.wide.u32 %rd169, %r61, 4; |
| sub.s64 %rd47, %rd1, %rd169; |
| ld.local.u32 %r709, [%rd47+24]; |
| ld.local.u32 %r710, [%rd47+20]; |
| and.b32 %r65, %r59, 31; |
| setp.eq.s32 %p42, %r65, 0; |
| @%p42 bra LBB0_38; |
|
|
| mov.u32 %r341, 32; |
| sub.s32 %r342, %r341, %r65; |
| shr.u32 %r343, %r710, %r342; |
| shl.b32 %r344, %r709, %r65; |
| add.s32 %r709, %r343, %r344; |
| ld.local.u32 %r345, [%rd47+16]; |
| shr.u32 %r346, %r345, %r342; |
| shl.b32 %r347, %r710, %r65; |
| add.s32 %r710, %r346, %r347; |
|
|
| LBB0_38: |
| shr.u32 %r348, %r710, 30; |
| shl.b32 %r349, %r709, 2; |
| or.b32 %r712, %r348, %r349; |
| shl.b32 %r713, %r710, 2; |
| shr.u32 %r350, %r712, 31; |
| shr.u32 %r351, %r709, 30; |
| add.s32 %r72, %r350, %r351; |
| setp.eq.s32 %p43, %r350, 0; |
| mov.u32 %r711, %r62; |
| @%p43 bra LBB0_40; |
|
|
| not.b32 %r352, %r712; |
| neg.s32 %r73, %r713; |
| setp.eq.s32 %p44, %r713, 0; |
| selp.u32 %r353, 1, 0, %p44; |
| add.s32 %r712, %r353, %r352; |
| xor.b32 %r711, %r62, -2147483648; |
| mov.u32 %r713, %r73; |
|
|
| LBB0_40: |
| cvt.u64.u32 %rd170, %r712; |
| cvt.u64.u32 %rd171, %r713; |
| bfi.b64 %rd172, %rd170, %rd171, 32, 32; |
| cvt.rn.f64.s64 %fd5, %rd172; |
| mul.f64 %fd6, %fd5, 0d3BF921FB54442D19; |
| cvt.rn.f32.f64 %f312, %fd6; |
| neg.f32 %f313, %f312; |
| setp.eq.s32 %p45, %r711, 0; |
| selp.f32 %f671, %f312, %f313, %p45; |
| setp.eq.s32 %p46, %r62, 0; |
| neg.s32 %r354, %r72; |
| selp.b32 %r714, %r72, %r354, %p46; |
|
|
| LBB0_42: |
| mul.f32 %f44, %f27, %f670; |
| and.b32 %r81, %r714, 1; |
| setp.eq.s32 %p47, %r81, 0; |
| selp.f32 %f45, %f671, 0f3F800000, %p47; |
| mul.rn.f32 %f46, %f671, %f671; |
| mov.f32 %f672, 0fB94D4153; |
| @%p47 bra LBB0_44; |
|
|
| fma.rn.f32 %f672, %f200, %f46, %f199; |
|
|
| LBB0_44: |
| selp.f32 %f318, 0f3C0885E4, 0f3D2AAABB, %p47; |
| fma.rn.f32 %f319, %f672, %f46, %f318; |
| selp.f32 %f320, 0fBE2AAAA8, 0fBEFFFFFF, %p47; |
| fma.rn.f32 %f321, %f319, %f46, %f320; |
| fma.rn.f32 %f323, %f46, %f45, %f192; |
| fma.rn.f32 %f673, %f321, %f323, %f45; |
| and.b32 %r355, %r714, 2; |
| setp.eq.s32 %p49, %r355, 0; |
| @%p49 bra LBB0_46; |
|
|
| mov.f32 %f325, 0fBF800000; |
| fma.rn.f32 %f673, %f673, %f325, %f192; |
|
|
| LBB0_46: |
| abs.f32 %f326, %f667; |
| abs.f32 %f327, %f666; |
| setp.gt.f32 %p50, %f327, %f326; |
| neg.f32 %f328, %f665; |
| selp.f32 %f674, %f328, 0f00000000, %p50; |
| neg.f32 %f329, %f667; |
| selp.f32 %f675, %f666, %f329, %p50; |
| selp.f32 %f676, 0f00000000, %f665, %p50; |
| mul.f32 %f330, %f676, %f676; |
| fma.rn.f32 %f331, %f675, %f675, %f330; |
| fma.rn.f32 %f55, %f674, %f674, %f331; |
| setp.leu.f32 %p51, %f55, 0f00000000; |
| @%p51 bra LBB0_48; |
|
|
| sqrt.rn.f32 %f332, %f55; |
| div.rn.f32 %f674, %f674, %f332; |
| div.rn.f32 %f675, %f675, %f332; |
| div.rn.f32 %f676, %f676, %f332; |
|
|
| LBB0_48: |
| mul.f32 %f342, %f27, %f673; |
| mul.f32 %f343, %f342, %f342; |
| mul.f32 %f344, %f44, %f44; |
| sub.f32 %f346, %f172, %f344; |
| sub.f32 %f347, %f346, %f343; |
| max.f32 %f348, %f192, %f347; |
| sqrt.rn.f32 %f349, %f348; |
| mul.f32 %f350, %f665, %f676; |
| mul.f32 %f351, %f667, %f675; |
| sub.f32 %f352, %f351, %f350; |
| mul.f32 %f353, %f667, %f674; |
| mul.f32 %f354, %f666, %f676; |
| sub.f32 %f355, %f354, %f353; |
| mul.f32 %f356, %f666, %f675; |
| mul.f32 %f357, %f665, %f674; |
| sub.f32 %f358, %f357, %f356; |
| mul.f32 %f359, %f44, %f352; |
| fma.rn.f32 %f360, %f342, %f674, %f359; |
| fma.rn.f32 %f62, %f666, %f349, %f360; |
| mul.f32 %f361, %f44, %f355; |
| fma.rn.f32 %f362, %f342, %f675, %f361; |
| fma.rn.f32 %f63, %f665, %f349, %f362; |
| mul.f32 %f363, %f44, %f358; |
| fma.rn.f32 %f364, %f342, %f676, %f363; |
| fma.rn.f32 %f64, %f667, %f349, %f364; |
| mul.lo.s64 %rd174, %rd31, 6364136223846793005; |
| add.s64 %rd310, %rd174, -2720673578348880933; |
| // begin inline asm |
| call(%r356,%r357,%r358,%r359,%r360,%r361,%r362,%r363,%r364,%r365,%r366,%r367,%r368,%r369,%r370,%r371,%r372,%r373,%r374,%r375,%r376,%r377,%r378,%r379,%r380,%r381,%r382,%r383,%r384,%r385,%r386,%r387),_optix_trace_typed_32,(%r298,%rd18,%f24,%f25,%f26,%f62,%f63,%f64,%f192,%f191,%f192,%r261,%r266,%r298,%r266,%r298,%r266,%r228,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298); |
| // end inline asm |
| setp.eq.s32 %p52, %r356, -1; |
| @%p52 bra LBB0_124; |
|
|
| mul.wide.u32 %rd175, %r356, 36; |
| add.s64 %rd176, %rd19, %rd175; |
| ld.global.f32 %f365, [%rd176]; |
| ld.global.f32 %f366, [%rd176+12]; |
| sub.f32 %f367, %f366, %f365; |
| ld.global.f32 %f368, [%rd176+4]; |
| ld.global.f32 %f369, [%rd176+16]; |
| sub.f32 %f370, %f369, %f368; |
| ld.global.f32 %f371, [%rd176+8]; |
| ld.global.f32 %f372, [%rd176+20]; |
| sub.f32 %f373, %f372, %f371; |
| ld.global.f32 %f374, [%rd176+24]; |
| sub.f32 %f375, %f374, %f365; |
| ld.global.f32 %f376, [%rd176+28]; |
| sub.f32 %f377, %f376, %f368; |
| ld.global.f32 %f378, [%rd176+32]; |
| sub.f32 %f379, %f378, %f371; |
| sub.f32 %f380, %f24, %f365; |
| sub.f32 %f381, %f25, %f368; |
| sub.f32 %f382, %f26, %f371; |
| mul.f32 %f383, %f370, %f379; |
| mul.f32 %f384, %f373, %f377; |
| sub.f32 %f385, %f383, %f384; |
| mul.f32 %f386, %f373, %f375; |
| mul.f32 %f387, %f367, %f379; |
| sub.f32 %f388, %f386, %f387; |
| mul.f32 %f389, %f367, %f377; |
| mul.f32 %f390, %f370, %f375; |
| sub.f32 %f391, %f389, %f390; |
| mul.f32 %f392, %f64, %f381; |
| mul.f32 %f393, %f63, %f382; |
| sub.f32 %f394, %f392, %f393; |
| mul.f32 %f395, %f62, %f382; |
| mul.f32 %f396, %f64, %f380; |
| sub.f32 %f397, %f395, %f396; |
| mul.f32 %f398, %f63, %f380; |
| mul.f32 %f399, %f62, %f381; |
| sub.f32 %f400, %f398, %f399; |
| mul.f32 %f401, %f62, %f385; |
| mul.f32 %f402, %f63, %f388; |
| mul.f32 %f403, %f64, %f391; |
| add.f32 %f404, %f403, %f402; |
| add.f32 %f405, %f401, %f404; |
| rcp.rn.f32 %f406, %f405; |
| mul.f32 %f407, %f379, %f400; |
| fma.rn.f32 %f408, %f377, %f397, %f407; |
| fma.rn.f32 %f409, %f375, %f394, %f408; |
| mul.f32 %f410, %f406, %f409; |
| mul.f32 %f411, %f373, %f400; |
| fma.rn.f32 %f412, %f370, %f397, %f411; |
| fma.rn.f32 %f413, %f367, %f394, %f412; |
| mul.f32 %f414, %f406, %f413; |
| mul.f32 %f415, %f388, %f381; |
| fma.rn.f32 %f416, %f391, %f382, %f415; |
| fma.rn.f32 %f417, %f380, %f385, %f416; |
| mul.f32 %f65, %f417, %f406; |
| setp.gt.f32 %p53, %f410, 0f80000000; |
| setp.lt.f32 %p54, %f410, 0fBF800000; |
| or.pred %p55, %p53, %p54; |
| setp.lt.f32 %p56, %f414, 0f00000000; |
| or.pred %p57, %p56, %p55; |
| sub.f32 %f418, %f414, %f410; |
| setp.gt.f32 %p58, %f418, 0f3F800000; |
| or.pred %p2, %p58, %p57; |
| neg.f32 %f419, %f403; |
| sub.f32 %f420, %f419, %f402; |
| sub.f32 %f421, %f420, %f401; |
| mov.b32 %r427, %f421; |
| and.b32 %r428, %r427, -2147483648; |
| or.b32 %r429, %r428, 1065353216; |
| mov.b32 %f422, %r429; |
| mul.f32 %f678, %f385, %f422; |
| mul.f32 %f677, %f388, %f422; |
| mul.f32 %f679, %f391, %f422; |
| mul.f32 %f423, %f679, %f679; |
| fma.rn.f32 %f424, %f677, %f677, %f423; |
| fma.rn.f32 %f69, %f678, %f678, %f424; |
| setp.leu.f32 %p59, %f69, 0f00000000; |
| @%p59 bra LBB0_51; |
|
|
| sqrt.rn.f32 %f425, %f69; |
| div.rn.f32 %f678, %f678, %f425; |
| div.rn.f32 %f677, %f677, %f425; |
| div.rn.f32 %f679, %f679, %f425; |
|
|
| LBB0_51: |
| mov.f32 %f660, 0fBA83126F; |
| sub.f32 %f427, %f660, %f65; |
| setp.gt.f32 %p60, %f65, 0f80000000; |
| selp.f32 %f428, 0f7F7FFFFF, %f427, %p2; |
| selp.f32 %f429, 0f7F7FFFFF, %f428, %p60; |
| max.f32 %f431, %f192, %f429; |
| fma.rn.f32 %f76, %f62, %f431, %f24; |
| fma.rn.f32 %f77, %f63, %f431, %f25; |
| fma.rn.f32 %f78, %f64, %f431, %f26; |
| mul.lo.s64 %rd177, %rd310, 6364136223846793005; |
| add.s64 %rd49, %rd177, -2720673578348880933; |
| shr.u64 %rd178, %rd310, 18; |
| xor.b64 %rd179, %rd178, %rd310; |
| shr.u64 %rd180, %rd179, 27; |
| cvt.u32.u64 %r430, %rd180; |
| shr.u64 %rd181, %rd310, 59; |
| cvt.u32.u64 %r431, %rd181; |
| shf.r.wrap.b32 %r432, %r430, %r430, %r431; |
| shr.u32 %r433, %r432, 9; |
| or.b32 %r434, %r433, 1065353216; |
| mov.b32 %f432, %r434; |
| add.f32 %f433, %f432, 0fBF800000; |
| shr.u64 %rd182, %rd49, 18; |
| xor.b64 %rd183, %rd182, %rd49; |
| shr.u64 %rd184, %rd183, 27; |
| cvt.u32.u64 %r435, %rd184; |
| shr.u64 %rd185, %rd49, 59; |
| cvt.u32.u64 %r436, %rd185; |
| shf.r.wrap.b32 %r437, %r435, %r435, %r436; |
| shr.u32 %r438, %r437, 9; |
| or.b32 %r439, %r438, 1065353216; |
| mov.b32 %f434, %r439; |
| add.f32 %f435, %f434, 0fBF800000; |
| sqrt.rn.f32 %f79, %f433; |
| mul.f32 %f80, %f435, 0f40C90FDB; |
| mul.f32 %f436, %f80, 0f3F22F983; |
| cvt.rni.s32.f32 %r726, %f436; |
| cvt.rn.f32.s32 %f437, %r726; |
| fma.rn.f32 %f439, %f437, %f176, %f80; |
| fma.rn.f32 %f441, %f437, %f178, %f439; |
| fma.rn.f32 %f683, %f437, %f180, %f441; |
| abs.f32 %f82, %f80; |
| setp.leu.f32 %p61, %f82, 0f47CE4780; |
| mov.u32 %r720, %r726; |
| mov.f32 %f680, %f683; |
| @%p61 bra LBB0_61; |
|
|
| setp.eq.f32 %p62, %f82, 0f7F800000; |
| @%p62 bra LBB0_60; |
| bra.uni LBB0_53; |
|
|
| LBB0_60: |
| mul.rn.f32 %f680, %f80, %f192; |
| mov.u32 %r720, %r726; |
| bra.uni LBB0_61; |
|
|
| LBB0_53: |
| mov.b32 %r84, %f80; |
| bfe.u32 %r440, %r84, 23, 8; |
| add.s32 %r85, %r440, -128; |
| shl.b32 %r441, %r84, 8; |
| or.b32 %r86, %r441, -2147483648; |
| shr.u32 %r87, %r85, 5; |
| mov.u64 %rd293, 0; |
| mov.u64 %rd292, %rd1; |
| mov.u64 %rd294, %rd293; |
|
|
| LBB0_54: |
| .pragma "nounroll"; |
| shl.b64 %rd188, %rd293, 2; |
| mov.u64 %rd189, __cudart_i2opi_f; |
| add.s64 %rd190, %rd189, %rd188; |
| ld.global.nc.u32 %r442, [%rd190]; |
| mad.wide.u32 %rd191, %r442, %r86, %rd294; |
| shr.u64 %rd294, %rd191, 32; |
| st.local.u32 [%rd292], %rd191; |
| cvt.u32.u64 %r443, %rd293; |
| add.s32 %r444, %r443, 1; |
| cvt.u64.u32 %rd293, %r444; |
| mul.wide.u32 %rd192, %r443, 4; |
| add.s64 %rd193, %rd1, %rd192; |
| add.s64 %rd292, %rd193, 4; |
| setp.ne.s32 %p63, %r444, 6; |
| @%p63 bra LBB0_54; |
|
|
| and.b32 %r88, %r84, -2147483648; |
| st.local.u32 [%rd22], %rd294; |
| mul.wide.u32 %rd194, %r87, 4; |
| sub.s64 %rd57, %rd1, %rd194; |
| ld.local.u32 %r715, [%rd57+24]; |
| ld.local.u32 %r716, [%rd57+20]; |
| and.b32 %r91, %r85, 31; |
| setp.eq.s32 %p64, %r91, 0; |
| @%p64 bra LBB0_57; |
|
|
| mov.u32 %r445, 32; |
| sub.s32 %r446, %r445, %r91; |
| shr.u32 %r447, %r716, %r446; |
| shl.b32 %r448, %r715, %r91; |
| add.s32 %r715, %r447, %r448; |
| ld.local.u32 %r449, [%rd57+16]; |
| shr.u32 %r450, %r449, %r446; |
| shl.b32 %r451, %r716, %r91; |
| add.s32 %r716, %r450, %r451; |
|
|
| LBB0_57: |
| shr.u32 %r452, %r716, 30; |
| shl.b32 %r453, %r715, 2; |
| or.b32 %r718, %r452, %r453; |
| shl.b32 %r719, %r716, 2; |
| shr.u32 %r454, %r718, 31; |
| shr.u32 %r455, %r715, 30; |
| add.s32 %r98, %r454, %r455; |
| setp.eq.s32 %p65, %r454, 0; |
| mov.u32 %r717, %r88; |
| @%p65 bra LBB0_59; |
|
|
| not.b32 %r456, %r718; |
| neg.s32 %r99, %r719; |
| setp.eq.s32 %p66, %r719, 0; |
| selp.u32 %r457, 1, 0, %p66; |
| add.s32 %r718, %r457, %r456; |
| xor.b32 %r717, %r88, -2147483648; |
| mov.u32 %r719, %r99; |
|
|
| LBB0_59: |
| cvt.u64.u32 %rd195, %r718; |
| cvt.u64.u32 %rd196, %r719; |
| bfi.b64 %rd197, %rd195, %rd196, 32, 32; |
| cvt.rn.f64.s64 %fd7, %rd197; |
| mul.f64 %fd8, %fd7, 0d3BF921FB54442D19; |
| cvt.rn.f32.f64 %f443, %fd8; |
| neg.f32 %f444, %f443; |
| setp.eq.s32 %p67, %r717, 0; |
| selp.f32 %f680, %f443, %f444, %p67; |
| setp.eq.s32 %p68, %r88, 0; |
| neg.s32 %r458, %r98; |
| selp.b32 %r720, %r98, %r458, %p68; |
|
|
| LBB0_61: |
| add.s32 %r107, %r720, 1; |
| and.b32 %r108, %r107, 1; |
| setp.eq.s32 %p69, %r108, 0; |
| selp.f32 %f86, %f680, 0f3F800000, %p69; |
| mul.rn.f32 %f87, %f680, %f680; |
| mov.f32 %f681, 0fB94D4153; |
| @%p69 bra LBB0_63; |
|
|
| fma.rn.f32 %f681, %f200, %f87, %f199; |
|
|
| LBB0_63: |
| selp.f32 %f449, 0f3C0885E4, 0f3D2AAABB, %p69; |
| fma.rn.f32 %f450, %f681, %f87, %f449; |
| selp.f32 %f451, 0fBE2AAAA8, 0fBEFFFFFF, %p69; |
| fma.rn.f32 %f452, %f450, %f87, %f451; |
| fma.rn.f32 %f454, %f87, %f86, %f192; |
| fma.rn.f32 %f682, %f452, %f454, %f86; |
| and.b32 %r459, %r107, 2; |
| setp.eq.s32 %p71, %r459, 0; |
| @%p71 bra LBB0_65; |
|
|
| mov.f32 %f456, 0fBF800000; |
| fma.rn.f32 %f682, %f682, %f456, %f192; |
|
|
| LBB0_65: |
| @%p61 bra LBB0_75; |
|
|
| setp.eq.f32 %p73, %f82, 0f7F800000; |
| @%p73 bra LBB0_74; |
| bra.uni LBB0_67; |
|
|
| LBB0_74: |
| mul.rn.f32 %f683, %f80, %f192; |
| bra.uni LBB0_75; |
|
|
| LBB0_67: |
| mov.b32 %r109, %f80; |
| bfe.u32 %r460, %r109, 23, 8; |
| add.s32 %r110, %r460, -128; |
| shl.b32 %r461, %r109, 8; |
| or.b32 %r111, %r461, -2147483648; |
| shr.u32 %r112, %r110, 5; |
| mov.u64 %rd296, 0; |
| mov.u64 %rd295, %rd1; |
| mov.u64 %rd297, %rd296; |
|
|
| LBB0_68: |
| .pragma "nounroll"; |
| shl.b64 %rd200, %rd296, 2; |
| mov.u64 %rd201, __cudart_i2opi_f; |
| add.s64 %rd202, %rd201, %rd200; |
| ld.global.nc.u32 %r462, [%rd202]; |
| mad.wide.u32 %rd203, %r462, %r111, %rd297; |
| shr.u64 %rd297, %rd203, 32; |
| st.local.u32 [%rd295], %rd203; |
| cvt.u32.u64 %r463, %rd296; |
| add.s32 %r464, %r463, 1; |
| cvt.u64.u32 %rd296, %r464; |
| mul.wide.u32 %rd204, %r463, 4; |
| add.s64 %rd205, %rd1, %rd204; |
| add.s64 %rd295, %rd205, 4; |
| setp.ne.s32 %p74, %r464, 6; |
| @%p74 bra LBB0_68; |
|
|
| and.b32 %r113, %r109, -2147483648; |
| st.local.u32 [%rd22], %rd297; |
| mul.wide.u32 %rd206, %r112, 4; |
| sub.s64 %rd65, %rd1, %rd206; |
| ld.local.u32 %r721, [%rd65+24]; |
| ld.local.u32 %r722, [%rd65+20]; |
| and.b32 %r116, %r110, 31; |
| setp.eq.s32 %p75, %r116, 0; |
| @%p75 bra LBB0_71; |
|
|
| mov.u32 %r465, 32; |
| sub.s32 %r466, %r465, %r116; |
| shr.u32 %r467, %r722, %r466; |
| shl.b32 %r468, %r721, %r116; |
| add.s32 %r721, %r467, %r468; |
| ld.local.u32 %r469, [%rd65+16]; |
| shr.u32 %r470, %r469, %r466; |
| shl.b32 %r471, %r722, %r116; |
| add.s32 %r722, %r470, %r471; |
|
|
| LBB0_71: |
| shr.u32 %r472, %r722, 30; |
| shl.b32 %r473, %r721, 2; |
| or.b32 %r724, %r472, %r473; |
| shl.b32 %r725, %r722, 2; |
| shr.u32 %r474, %r724, 31; |
| shr.u32 %r475, %r721, 30; |
| add.s32 %r123, %r474, %r475; |
| setp.eq.s32 %p76, %r474, 0; |
| mov.u32 %r723, %r113; |
| @%p76 bra LBB0_73; |
|
|
| not.b32 %r476, %r724; |
| neg.s32 %r124, %r725; |
| setp.eq.s32 %p77, %r725, 0; |
| selp.u32 %r477, 1, 0, %p77; |
| add.s32 %r724, %r477, %r476; |
| xor.b32 %r723, %r113, -2147483648; |
| mov.u32 %r725, %r124; |
|
|
| LBB0_73: |
| cvt.u64.u32 %rd207, %r724; |
| cvt.u64.u32 %rd208, %r725; |
| bfi.b64 %rd209, %rd207, %rd208, 32, 32; |
| cvt.rn.f64.s64 %fd9, %rd209; |
| mul.f64 %fd10, %fd9, 0d3BF921FB54442D19; |
| cvt.rn.f32.f64 %f457, %fd10; |
| neg.f32 %f458, %f457; |
| setp.eq.s32 %p78, %r723, 0; |
| selp.f32 %f683, %f457, %f458, %p78; |
| setp.eq.s32 %p79, %r113, 0; |
| neg.s32 %r478, %r123; |
| selp.b32 %r726, %r123, %r478, %p79; |
|
|
| LBB0_75: |
| mul.f32 %f96, %f79, %f682; |
| and.b32 %r132, %r726, 1; |
| setp.eq.s32 %p80, %r132, 0; |
| selp.f32 %f97, %f683, 0f3F800000, %p80; |
| mul.rn.f32 %f98, %f683, %f683; |
| mov.f32 %f684, 0fB94D4153; |
| @%p80 bra LBB0_77; |
|
|
| fma.rn.f32 %f684, %f200, %f98, %f199; |
|
|
| LBB0_77: |
| selp.f32 %f463, 0f3C0885E4, 0f3D2AAABB, %p80; |
| fma.rn.f32 %f464, %f684, %f98, %f463; |
| selp.f32 %f465, 0fBE2AAAA8, 0fBEFFFFFF, %p80; |
| fma.rn.f32 %f466, %f464, %f98, %f465; |
| fma.rn.f32 %f468, %f98, %f97, %f192; |
| fma.rn.f32 %f685, %f466, %f468, %f97; |
| and.b32 %r479, %r726, 2; |
| setp.eq.s32 %p82, %r479, 0; |
| @%p82 bra LBB0_79; |
|
|
| mov.f32 %f470, 0fBF800000; |
| fma.rn.f32 %f685, %f685, %f470, %f192; |
|
|
| LBB0_79: |
| abs.f32 %f471, %f679; |
| abs.f32 %f472, %f678; |
| setp.gt.f32 %p83, %f472, %f471; |
| neg.f32 %f473, %f677; |
| selp.f32 %f686, %f473, 0f00000000, %p83; |
| neg.f32 %f474, %f679; |
| selp.f32 %f687, %f678, %f474, %p83; |
| selp.f32 %f688, 0f00000000, %f677, %p83; |
| mul.f32 %f475, %f688, %f688; |
| fma.rn.f32 %f476, %f687, %f687, %f475; |
| fma.rn.f32 %f107, %f686, %f686, %f476; |
| setp.leu.f32 %p84, %f107, 0f00000000; |
| @%p84 bra LBB0_81; |
|
|
| sqrt.rn.f32 %f477, %f107; |
| div.rn.f32 %f686, %f686, %f477; |
| div.rn.f32 %f687, %f687, %f477; |
| div.rn.f32 %f688, %f688, %f477; |
|
|
| LBB0_81: |
| mov.u32 %r691, 255; |
| mov.f32 %f661, 0f5A0E1BCA; |
| mul.f32 %f487, %f79, %f685; |
| mul.f32 %f488, %f487, %f487; |
| mul.f32 %f489, %f96, %f96; |
| sub.f32 %f491, %f172, %f489; |
| sub.f32 %f492, %f491, %f488; |
| max.f32 %f493, %f192, %f492; |
| sqrt.rn.f32 %f494, %f493; |
| mul.f32 %f495, %f677, %f688; |
| mul.f32 %f496, %f679, %f687; |
| sub.f32 %f497, %f496, %f495; |
| mul.f32 %f498, %f679, %f686; |
| mul.f32 %f499, %f678, %f688; |
| sub.f32 %f500, %f499, %f498; |
| mul.f32 %f501, %f678, %f687; |
| mul.f32 %f502, %f677, %f686; |
| sub.f32 %f503, %f502, %f501; |
| mul.f32 %f504, %f96, %f497; |
| fma.rn.f32 %f505, %f487, %f686, %f504; |
| fma.rn.f32 %f114, %f678, %f494, %f505; |
| mul.f32 %f506, %f96, %f500; |
| fma.rn.f32 %f507, %f487, %f687, %f506; |
| fma.rn.f32 %f115, %f677, %f494, %f507; |
| mul.f32 %f508, %f96, %f503; |
| fma.rn.f32 %f509, %f487, %f688, %f508; |
| fma.rn.f32 %f116, %f679, %f494, %f509; |
| mul.lo.s64 %rd211, %rd49, 6364136223846793005; |
| add.s64 %rd310, %rd211, -2720673578348880933; |
| // begin inline asm |
| call(%r480,%r481,%r482,%r483,%r484,%r485,%r486,%r487,%r488,%r489,%r490,%r491,%r492,%r493,%r494,%r495,%r496,%r497,%r498,%r499,%r500,%r501,%r502,%r503,%r504,%r505,%r506,%r507,%r508,%r509,%r510,%r511),_optix_trace_typed_32,(%r298,%rd18,%f76,%f77,%f78,%f114,%f115,%f116,%f192,%f661,%f192,%r691,%r266,%r298,%r266,%r298,%r266,%r356,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298); |
| // end inline asm |
| setp.eq.s32 %p85, %r480, -1; |
| @%p85 bra LBB0_124; |
|
|
| mul.wide.u32 %rd212, %r480, 36; |
| add.s64 %rd213, %rd19, %rd212; |
| ld.global.f32 %f510, [%rd213]; |
| ld.global.f32 %f511, [%rd213+12]; |
| sub.f32 %f512, %f511, %f510; |
| ld.global.f32 %f513, [%rd213+4]; |
| ld.global.f32 %f514, [%rd213+16]; |
| sub.f32 %f515, %f514, %f513; |
| ld.global.f32 %f516, [%rd213+8]; |
| ld.global.f32 %f517, [%rd213+20]; |
| sub.f32 %f518, %f517, %f516; |
| ld.global.f32 %f519, [%rd213+24]; |
| sub.f32 %f520, %f519, %f510; |
| ld.global.f32 %f521, [%rd213+28]; |
| sub.f32 %f522, %f521, %f513; |
| ld.global.f32 %f523, [%rd213+32]; |
| sub.f32 %f524, %f523, %f516; |
| sub.f32 %f525, %f76, %f510; |
| sub.f32 %f526, %f77, %f513; |
| sub.f32 %f527, %f78, %f516; |
| mul.f32 %f528, %f515, %f524; |
| mul.f32 %f529, %f518, %f522; |
| sub.f32 %f530, %f528, %f529; |
| mul.f32 %f531, %f518, %f520; |
| mul.f32 %f532, %f512, %f524; |
| sub.f32 %f533, %f531, %f532; |
| mul.f32 %f534, %f512, %f522; |
| mul.f32 %f535, %f515, %f520; |
| sub.f32 %f536, %f534, %f535; |
| mul.f32 %f537, %f116, %f526; |
| mul.f32 %f538, %f115, %f527; |
| sub.f32 %f539, %f537, %f538; |
| mul.f32 %f540, %f114, %f527; |
| mul.f32 %f541, %f116, %f525; |
| sub.f32 %f542, %f540, %f541; |
| mul.f32 %f543, %f115, %f525; |
| mul.f32 %f544, %f114, %f526; |
| sub.f32 %f545, %f543, %f544; |
| mul.f32 %f546, %f114, %f530; |
| mul.f32 %f547, %f115, %f533; |
| mul.f32 %f548, %f116, %f536; |
| add.f32 %f549, %f548, %f547; |
| add.f32 %f550, %f546, %f549; |
| rcp.rn.f32 %f551, %f550; |
| mul.f32 %f552, %f524, %f545; |
| fma.rn.f32 %f553, %f522, %f542, %f552; |
| fma.rn.f32 %f554, %f520, %f539, %f553; |
| mul.f32 %f555, %f551, %f554; |
| mul.f32 %f556, %f518, %f545; |
| fma.rn.f32 %f557, %f515, %f542, %f556; |
| fma.rn.f32 %f558, %f512, %f539, %f557; |
| mul.f32 %f559, %f551, %f558; |
| mul.f32 %f560, %f533, %f526; |
| fma.rn.f32 %f561, %f536, %f527, %f560; |
| fma.rn.f32 %f562, %f525, %f530, %f561; |
| mul.f32 %f117, %f562, %f551; |
| setp.gt.f32 %p86, %f555, 0f80000000; |
| setp.lt.f32 %p87, %f555, 0fBF800000; |
| or.pred %p88, %p86, %p87; |
| setp.lt.f32 %p89, %f559, 0f00000000; |
| or.pred %p90, %p89, %p88; |
| sub.f32 %f563, %f559, %f555; |
| setp.gt.f32 %p91, %f563, 0f3F800000; |
| or.pred %p3, %p91, %p90; |
| neg.f32 %f564, %f548; |
| sub.f32 %f565, %f564, %f547; |
| sub.f32 %f566, %f565, %f546; |
| mov.b32 %r551, %f566; |
| and.b32 %r552, %r551, -2147483648; |
| or.b32 %r553, %r552, 1065353216; |
| mov.b32 %f567, %r553; |
| mul.f32 %f690, %f530, %f567; |
| mul.f32 %f689, %f533, %f567; |
| mul.f32 %f691, %f536, %f567; |
| mul.f32 %f568, %f691, %f691; |
| fma.rn.f32 %f569, %f689, %f689, %f568; |
| fma.rn.f32 %f121, %f690, %f690, %f569; |
| setp.leu.f32 %p92, %f121, 0f00000000; |
| @%p92 bra LBB0_84; |
|
|
| sqrt.rn.f32 %f570, %f121; |
| div.rn.f32 %f690, %f690, %f570; |
| div.rn.f32 %f689, %f689, %f570; |
| div.rn.f32 %f691, %f691, %f570; |
|
|
| LBB0_84: |
| mov.f32 %f662, 0fBA83126F; |
| sub.f32 %f572, %f662, %f117; |
| setp.gt.f32 %p93, %f117, 0f80000000; |
| selp.f32 %f573, 0f7F7FFFFF, %f572, %p3; |
| selp.f32 %f574, 0f7F7FFFFF, %f573, %p93; |
| max.f32 %f576, %f192, %f574; |
| fma.rn.f32 %f128, %f114, %f576, %f76; |
| fma.rn.f32 %f129, %f115, %f576, %f77; |
| fma.rn.f32 %f130, %f116, %f576, %f78; |
| mul.lo.s64 %rd214, %rd310, 6364136223846793005; |
| add.s64 %rd67, %rd214, -2720673578348880933; |
| shr.u64 %rd215, %rd310, 18; |
| xor.b64 %rd216, %rd215, %rd310; |
| shr.u64 %rd217, %rd216, 27; |
| cvt.u32.u64 %r554, %rd217; |
| shr.u64 %rd218, %rd310, 59; |
| cvt.u32.u64 %r555, %rd218; |
| shf.r.wrap.b32 %r556, %r554, %r554, %r555; |
| shr.u32 %r557, %r556, 9; |
| or.b32 %r558, %r557, 1065353216; |
| mov.b32 %f577, %r558; |
| add.f32 %f578, %f577, 0fBF800000; |
| shr.u64 %rd219, %rd67, 18; |
| xor.b64 %rd220, %rd219, %rd67; |
| shr.u64 %rd221, %rd220, 27; |
| cvt.u32.u64 %r559, %rd221; |
| shr.u64 %rd222, %rd67, 59; |
| cvt.u32.u64 %r560, %rd222; |
| shf.r.wrap.b32 %r561, %r559, %r559, %r560; |
| shr.u32 %r562, %r561, 9; |
| or.b32 %r563, %r562, 1065353216; |
| mov.b32 %f579, %r563; |
| add.f32 %f580, %f579, 0fBF800000; |
| sqrt.rn.f32 %f131, %f578; |
| mul.f32 %f132, %f580, 0f40C90FDB; |
| mul.f32 %f581, %f132, 0f3F22F983; |
| cvt.rni.s32.f32 %r738, %f581; |
| cvt.rn.f32.s32 %f582, %r738; |
| fma.rn.f32 %f584, %f582, %f176, %f132; |
| fma.rn.f32 %f586, %f582, %f178, %f584; |
| fma.rn.f32 %f695, %f582, %f180, %f586; |
| abs.f32 %f134, %f132; |
| setp.leu.f32 %p94, %f134, 0f47CE4780; |
| mov.u32 %r732, %r738; |
| mov.f32 %f692, %f695; |
| @%p94 bra LBB0_94; |
|
|
| setp.eq.f32 %p95, %f134, 0f7F800000; |
| @%p95 bra LBB0_93; |
| bra.uni LBB0_86; |
|
|
| LBB0_93: |
| mul.rn.f32 %f692, %f132, %f192; |
| mov.u32 %r732, %r738; |
| bra.uni LBB0_94; |
|
|
| LBB0_86: |
| mov.b32 %r135, %f132; |
| bfe.u32 %r564, %r135, 23, 8; |
| add.s32 %r136, %r564, -128; |
| shl.b32 %r565, %r135, 8; |
| or.b32 %r137, %r565, -2147483648; |
| shr.u32 %r138, %r136, 5; |
| mov.u64 %rd299, 0; |
| mov.u64 %rd298, %rd1; |
| mov.u64 %rd300, %rd299; |
|
|
| LBB0_87: |
| .pragma "nounroll"; |
| shl.b64 %rd225, %rd299, 2; |
| mov.u64 %rd226, __cudart_i2opi_f; |
| add.s64 %rd227, %rd226, %rd225; |
| ld.global.nc.u32 %r566, [%rd227]; |
| mad.wide.u32 %rd228, %r566, %r137, %rd300; |
| shr.u64 %rd300, %rd228, 32; |
| st.local.u32 [%rd298], %rd228; |
| cvt.u32.u64 %r567, %rd299; |
| add.s32 %r568, %r567, 1; |
| cvt.u64.u32 %rd299, %r568; |
| mul.wide.u32 %rd229, %r567, 4; |
| add.s64 %rd230, %rd1, %rd229; |
| add.s64 %rd298, %rd230, 4; |
| setp.ne.s32 %p96, %r568, 6; |
| @%p96 bra LBB0_87; |
|
|
| and.b32 %r139, %r135, -2147483648; |
| st.local.u32 [%rd22], %rd300; |
| mul.wide.u32 %rd231, %r138, 4; |
| sub.s64 %rd75, %rd1, %rd231; |
| ld.local.u32 %r727, [%rd75+24]; |
| ld.local.u32 %r728, [%rd75+20]; |
| and.b32 %r142, %r136, 31; |
| setp.eq.s32 %p97, %r142, 0; |
| @%p97 bra LBB0_90; |
|
|
| mov.u32 %r569, 32; |
| sub.s32 %r570, %r569, %r142; |
| shr.u32 %r571, %r728, %r570; |
| shl.b32 %r572, %r727, %r142; |
| add.s32 %r727, %r571, %r572; |
| ld.local.u32 %r573, [%rd75+16]; |
| shr.u32 %r574, %r573, %r570; |
| shl.b32 %r575, %r728, %r142; |
| add.s32 %r728, %r574, %r575; |
|
|
| LBB0_90: |
| shr.u32 %r576, %r728, 30; |
| shl.b32 %r577, %r727, 2; |
| or.b32 %r730, %r576, %r577; |
| shl.b32 %r731, %r728, 2; |
| shr.u32 %r578, %r730, 31; |
| shr.u32 %r579, %r727, 30; |
| add.s32 %r149, %r578, %r579; |
| setp.eq.s32 %p98, %r578, 0; |
| mov.u32 %r729, %r139; |
| @%p98 bra LBB0_92; |
|
|
| not.b32 %r580, %r730; |
| neg.s32 %r150, %r731; |
| setp.eq.s32 %p99, %r731, 0; |
| selp.u32 %r581, 1, 0, %p99; |
| add.s32 %r730, %r581, %r580; |
| xor.b32 %r729, %r139, -2147483648; |
| mov.u32 %r731, %r150; |
|
|
| LBB0_92: |
| cvt.u64.u32 %rd232, %r730; |
| cvt.u64.u32 %rd233, %r731; |
| bfi.b64 %rd234, %rd232, %rd233, 32, 32; |
| cvt.rn.f64.s64 %fd11, %rd234; |
| mul.f64 %fd12, %fd11, 0d3BF921FB54442D19; |
| cvt.rn.f32.f64 %f588, %fd12; |
| neg.f32 %f589, %f588; |
| setp.eq.s32 %p100, %r729, 0; |
| selp.f32 %f692, %f588, %f589, %p100; |
| setp.eq.s32 %p101, %r139, 0; |
| neg.s32 %r582, %r149; |
| selp.b32 %r732, %r149, %r582, %p101; |
|
|
| LBB0_94: |
| add.s32 %r158, %r732, 1; |
| and.b32 %r159, %r158, 1; |
| setp.eq.s32 %p102, %r159, 0; |
| selp.f32 %f138, %f692, 0f3F800000, %p102; |
| mul.rn.f32 %f139, %f692, %f692; |
| mov.f32 %f693, 0fB94D4153; |
| @%p102 bra LBB0_96; |
|
|
| fma.rn.f32 %f693, %f200, %f139, %f199; |
|
|
| LBB0_96: |
| selp.f32 %f594, 0f3C0885E4, 0f3D2AAABB, %p102; |
| fma.rn.f32 %f595, %f693, %f139, %f594; |
| selp.f32 %f596, 0fBE2AAAA8, 0fBEFFFFFF, %p102; |
| fma.rn.f32 %f597, %f595, %f139, %f596; |
| fma.rn.f32 %f599, %f139, %f138, %f192; |
| fma.rn.f32 %f694, %f597, %f599, %f138; |
| and.b32 %r583, %r158, 2; |
| setp.eq.s32 %p104, %r583, 0; |
| @%p104 bra LBB0_98; |
|
|
| mov.f32 %f601, 0fBF800000; |
| fma.rn.f32 %f694, %f694, %f601, %f192; |
|
|
| LBB0_98: |
| @%p94 bra LBB0_108; |
|
|
| setp.eq.f32 %p106, %f134, 0f7F800000; |
| @%p106 bra LBB0_107; |
| bra.uni LBB0_100; |
|
|
| LBB0_107: |
| mul.rn.f32 %f695, %f132, %f192; |
| bra.uni LBB0_108; |
|
|
| LBB0_100: |
| mov.b32 %r160, %f132; |
| bfe.u32 %r584, %r160, 23, 8; |
| add.s32 %r161, %r584, -128; |
| shl.b32 %r585, %r160, 8; |
| or.b32 %r162, %r585, -2147483648; |
| shr.u32 %r163, %r161, 5; |
| mov.u64 %rd302, 0; |
| mov.u64 %rd301, %rd1; |
| mov.u64 %rd303, %rd302; |
|
|
| LBB0_101: |
| .pragma "nounroll"; |
| shl.b64 %rd237, %rd302, 2; |
| mov.u64 %rd238, __cudart_i2opi_f; |
| add.s64 %rd239, %rd238, %rd237; |
| ld.global.nc.u32 %r586, [%rd239]; |
| mad.wide.u32 %rd240, %r586, %r162, %rd303; |
| shr.u64 %rd303, %rd240, 32; |
| st.local.u32 [%rd301], %rd240; |
| cvt.u32.u64 %r587, %rd302; |
| add.s32 %r588, %r587, 1; |
| cvt.u64.u32 %rd302, %r588; |
| mul.wide.u32 %rd241, %r587, 4; |
| add.s64 %rd242, %rd1, %rd241; |
| add.s64 %rd301, %rd242, 4; |
| setp.ne.s32 %p107, %r588, 6; |
| @%p107 bra LBB0_101; |
|
|
| and.b32 %r164, %r160, -2147483648; |
| st.local.u32 [%rd22], %rd303; |
| mul.wide.u32 %rd243, %r163, 4; |
| sub.s64 %rd83, %rd1, %rd243; |
| ld.local.u32 %r733, [%rd83+24]; |
| ld.local.u32 %r734, [%rd83+20]; |
| and.b32 %r167, %r161, 31; |
| setp.eq.s32 %p108, %r167, 0; |
| @%p108 bra LBB0_104; |
|
|
| mov.u32 %r589, 32; |
| sub.s32 %r590, %r589, %r167; |
| shr.u32 %r591, %r734, %r590; |
| shl.b32 %r592, %r733, %r167; |
| add.s32 %r733, %r591, %r592; |
| ld.local.u32 %r593, [%rd83+16]; |
| shr.u32 %r594, %r593, %r590; |
| shl.b32 %r595, %r734, %r167; |
| add.s32 %r734, %r594, %r595; |
|
|
| LBB0_104: |
| shr.u32 %r596, %r734, 30; |
| shl.b32 %r597, %r733, 2; |
| or.b32 %r736, %r596, %r597; |
| shl.b32 %r737, %r734, 2; |
| shr.u32 %r598, %r736, 31; |
| shr.u32 %r599, %r733, 30; |
| add.s32 %r174, %r598, %r599; |
| setp.eq.s32 %p109, %r598, 0; |
| mov.u32 %r735, %r164; |
| @%p109 bra LBB0_106; |
|
|
| not.b32 %r600, %r736; |
| neg.s32 %r175, %r737; |
| setp.eq.s32 %p110, %r737, 0; |
| selp.u32 %r601, 1, 0, %p110; |
| add.s32 %r736, %r601, %r600; |
| xor.b32 %r735, %r164, -2147483648; |
| mov.u32 %r737, %r175; |
|
|
| LBB0_106: |
| cvt.u64.u32 %rd244, %r736; |
| cvt.u64.u32 %rd245, %r737; |
| bfi.b64 %rd246, %rd244, %rd245, 32, 32; |
| cvt.rn.f64.s64 %fd13, %rd246; |
| mul.f64 %fd14, %fd13, 0d3BF921FB54442D19; |
| cvt.rn.f32.f64 %f602, %fd14; |
| neg.f32 %f603, %f602; |
| setp.eq.s32 %p111, %r735, 0; |
| selp.f32 %f695, %f602, %f603, %p111; |
| setp.eq.s32 %p112, %r164, 0; |
| neg.s32 %r602, %r174; |
| selp.b32 %r738, %r174, %r602, %p112; |
|
|
| LBB0_108: |
| mul.f32 %f148, %f131, %f694; |
| and.b32 %r183, %r738, 1; |
| setp.eq.s32 %p113, %r183, 0; |
| selp.f32 %f149, %f695, 0f3F800000, %p113; |
| mul.rn.f32 %f150, %f695, %f695; |
| mov.f32 %f696, 0fB94D4153; |
| @%p113 bra LBB0_110; |
|
|
| fma.rn.f32 %f696, %f200, %f150, %f199; |
|
|
| LBB0_110: |
| selp.f32 %f608, 0f3C0885E4, 0f3D2AAABB, %p113; |
| fma.rn.f32 %f609, %f696, %f150, %f608; |
| selp.f32 %f610, 0fBE2AAAA8, 0fBEFFFFFF, %p113; |
| fma.rn.f32 %f611, %f609, %f150, %f610; |
| fma.rn.f32 %f613, %f150, %f149, %f192; |
| fma.rn.f32 %f697, %f611, %f613, %f149; |
| and.b32 %r603, %r738, 2; |
| setp.eq.s32 %p115, %r603, 0; |
| @%p115 bra LBB0_112; |
|
|
| mov.f32 %f615, 0fBF800000; |
| fma.rn.f32 %f697, %f697, %f615, %f192; |
|
|
| LBB0_112: |
| abs.f32 %f616, %f691; |
| abs.f32 %f617, %f690; |
| setp.gt.f32 %p116, %f617, %f616; |
| neg.f32 %f618, %f689; |
| selp.f32 %f698, %f618, 0f00000000, %p116; |
| neg.f32 %f619, %f691; |
| selp.f32 %f699, %f690, %f619, %p116; |
| selp.f32 %f700, 0f00000000, %f689, %p116; |
| mul.f32 %f620, %f700, %f700; |
| fma.rn.f32 %f621, %f699, %f699, %f620; |
| fma.rn.f32 %f159, %f698, %f698, %f621; |
| setp.leu.f32 %p117, %f159, 0f00000000; |
| @%p117 bra LBB0_114; |
|
|
| sqrt.rn.f32 %f622, %f159; |
| div.rn.f32 %f698, %f698, %f622; |
| div.rn.f32 %f699, %f699, %f622; |
| div.rn.f32 %f700, %f700, %f622; |
|
|
| LBB0_114: |
| mov.u32 %r692, 255; |
| mov.f32 %f663, 0f5A0E1BCA; |
| mul.f32 %f632, %f131, %f697; |
| mul.f32 %f633, %f632, %f632; |
| mul.f32 %f634, %f148, %f148; |
| sub.f32 %f636, %f172, %f634; |
| sub.f32 %f637, %f636, %f633; |
| max.f32 %f638, %f192, %f637; |
| sqrt.rn.f32 %f639, %f638; |
| mul.f32 %f640, %f689, %f700; |
| mul.f32 %f641, %f691, %f699; |
| sub.f32 %f642, %f641, %f640; |
| mul.f32 %f643, %f691, %f698; |
| mul.f32 %f644, %f690, %f700; |
| sub.f32 %f645, %f644, %f643; |
| mul.f32 %f646, %f690, %f699; |
| mul.f32 %f647, %f689, %f698; |
| sub.f32 %f648, %f647, %f646; |
| mul.f32 %f649, %f148, %f642; |
| fma.rn.f32 %f650, %f632, %f698, %f649; |
| fma.rn.f32 %f626, %f690, %f639, %f650; |
| mul.f32 %f651, %f148, %f645; |
| fma.rn.f32 %f652, %f632, %f699, %f651; |
| fma.rn.f32 %f627, %f689, %f639, %f652; |
| mul.f32 %f653, %f148, %f648; |
| fma.rn.f32 %f654, %f632, %f700, %f653; |
| fma.rn.f32 %f628, %f691, %f639, %f654; |
| mul.lo.s64 %rd248, %rd67, 6364136223846793005; |
| add.s64 %rd310, %rd248, -2720673578348880933; |
| // begin inline asm |
| call(%r695,%r605,%r606,%r607,%r608,%r609,%r610,%r611,%r612,%r613,%r614,%r615,%r616,%r617,%r618,%r619,%r620,%r621,%r622,%r623,%r624,%r625,%r626,%r627,%r628,%r629,%r630,%r631,%r632,%r633,%r634,%r635),_optix_trace_typed_32,(%r298,%rd18,%f128,%f129,%f130,%f626,%f627,%f628,%f192,%f663,%f192,%r692,%r266,%r298,%r266,%r298,%r266,%r480,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298,%r298); |
| // end inline asm |
| setp.eq.s32 %p118, %r695, -1; |
| @%p118 bra LBB0_124; |
|
|
| mul.lo.s64 %rd249, %rd310, 6364136223846793005; |
| add.s64 %rd85, %rd249, -2720673578348880933; |
| shr.u64 %rd250, %rd85, 18; |
| xor.b64 %rd251, %rd250, %rd85; |
| shr.u64 %rd252, %rd251, 27; |
| cvt.u32.u64 %r675, %rd252; |
| shr.u64 %rd253, %rd85, 59; |
| cvt.u32.u64 %r676, %rd253; |
| shf.r.wrap.b32 %r677, %r675, %r675, %r676; |
| shr.u32 %r678, %r677, 9; |
| or.b32 %r679, %r678, 1065353216; |
| mov.b32 %f655, %r679; |
| add.f32 %f656, %f655, 0fBF800000; |
| mul.f32 %f166, %f656, 0f40C90FDB; |
| abs.f32 %f657, %f166; |
| setp.leu.f32 %p119, %f657, 0f47CE4780; |
| setp.eq.f32 %p120, %f657, 0f7F800000; |
| or.pred %p4, %p120, %p119; |
| @%p4 bra LBB0_119; |
|
|
| mov.b32 %r680, %f166; |
| shl.b32 %r681, %r680, 8; |
| or.b32 %r185, %r681, -2147483648; |
| mov.u64 %rd305, 0; |
| mov.u64 %rd304, %rd1; |
| mov.u64 %rd306, %rd305; |
|
|
| LBB0_117: |
| .pragma "nounroll"; |
| shl.b64 %rd256, %rd305, 2; |
| mov.u64 %rd257, __cudart_i2opi_f; |
| add.s64 %rd258, %rd257, %rd256; |
| ld.global.nc.u32 %r682, [%rd258]; |
| mad.wide.u32 %rd259, %r682, %r185, %rd306; |
| shr.u64 %rd306, %rd259, 32; |
| st.local.u32 [%rd304], %rd259; |
| cvt.u32.u64 %r683, %rd305; |
| add.s32 %r684, %r683, 1; |
| cvt.u64.u32 %rd305, %r684; |
| mul.wide.u32 %rd260, %r683, 4; |
| add.s64 %rd261, %rd1, %rd260; |
| add.s64 %rd304, %rd261, 4; |
| setp.ne.s32 %p121, %r684, 6; |
| @%p121 bra LBB0_117; |
|
|
| st.local.u32 [%rd22], %rd306; |
|
|
| LBB0_119: |
| @%p4 bra LBB0_123; |
|
|
| mov.b32 %r685, %f166; |
| shl.b32 %r686, %r685, 8; |
| or.b32 %r186, %r686, -2147483648; |
| mov.u64 %rd308, 0; |
| mov.u64 %rd307, %rd1; |
| mov.u64 %rd309, %rd308; |
|
|
| LBB0_121: |
| .pragma "nounroll"; |
| shl.b64 %rd264, %rd308, 2; |
| mov.u64 %rd265, __cudart_i2opi_f; |
| add.s64 %rd266, %rd265, %rd264; |
| ld.global.nc.u32 %r687, [%rd266]; |
| mad.wide.u32 %rd267, %r687, %r186, %rd309; |
| shr.u64 %rd309, %rd267, 32; |
| st.local.u32 [%rd307], %rd267; |
| cvt.u32.u64 %r688, %rd308; |
| add.s32 %r689, %r688, 1; |
| cvt.u64.u32 %rd308, %r689; |
| mul.wide.u32 %rd268, %r688, 4; |
| add.s64 %rd269, %rd1, %rd268; |
| add.s64 %rd307, %rd269, 4; |
| setp.ne.s32 %p122, %r689, 6; |
| @%p122 bra LBB0_121; |
|
|
| st.local.u32 [%rd22], %rd309; |
|
|
| LBB0_123: |
| mul.lo.s64 %rd270, %rd85, 6364136223846793005; |
| add.s64 %rd310, %rd270, -2720673578348880933; |
| bra.uni LBB0_125; |
|
|
| LBB0_124: |
| add.s32 %r694, %r694, 1; |
| setp.gt.u32 %p123, %r694, 2; |
| mov.u32 %r695, -1; |
| @%p123 bra LBB0_127; |
|
|
| LBB0_125: |
| add.s32 %r693, %r693, 1; |
| setp.lt.u32 %p124, %r693, 32; |
| @%p124 bra LBB0_5; |
|
|
| ld.const.u64 %rd271, [params+16]; |
| cvta.to.global.u64 %rd272, %rd271; |
| shl.b64 %rd273, %rd2, 2; |
| add.s64 %rd274, %rd272, %rd273; |
| ld.global.f32 %f658, [%rd274]; |
| neg.f32 %f659, %f658; |
| st.global.f32 [%rd274], %f659; |
|
|
| LBB0_127: |
| ret; |
|
|
| } |
| // .globl __miss__ms |
| .visible .entry __miss__ms() |
| { |
| .reg .b32 %r<3>; |
|
|
|
|
| mov.u32 %r1, 0; |
| mov.u32 %r2, -1; |
| // begin inline asm |
| call _optix_set_payload, (%r1, %r2); |
| // end inline asm |
| ret; |
|
|
| } |
| // .globl __closesthit__ch |
| .visible .entry __closesthit__ch() |
| { |
| .reg .b32 %r<4>; |
|
|
|
|
| // begin inline asm |
| call (%r1), _optix_read_primitive_idx, (); |
| // end inline asm |
| mov.u32 %r2, 0; |
| // begin inline asm |
| call _optix_set_payload, (%r2, %r1); |
| // end inline asm |
| ret; |
|
|
| } |
|
|
|
|