// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: CL-34097967 // Cuda compilation tools, release 12.4, V12.4.131 // Based on NVVM 7.0.1 // .version 8.4 .target sm_52 .address_size 64 // .globl huffman_decode .const .align 1 .b8 shared_mem_constants[1280] = {120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 114, 114, 114, 114, 114, 114, 114, 114, 113, 113, 113, 113, 112, 112, 111, 0, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 106, 106, 106, 106, 106, 106, 106, 106, 104, 104, 103, 0, 122, 122, 105, 105, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 99, 99, 99, 99, 99, 99, 99, 99, 98, 98, 98, 98, 97, 97, 124, 0, 91, 91, 92, 92, 125, 125, 126, 126, 0, 81, 84, 85, 87, 88, 89, 90, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 32, 32, 0, 32, 32, 32, 32, 31, 31, 28, 27, 26, 25, 23, 22, 21, 20, 19, 18, 16, 15, 15, 13, 12, 11, 10, 10, 8, 7, 6, 5, 4, 3, 3, 2, 2, 3, 10, 15, 17, 24, 31, 31}; .extern .shared .align 16 .b8 shared_mem[]; .visible .entry huffman_decode( .param .u64 huffman_decode_param_0, .param .u64 huffman_decode_param_1, .param .u64 huffman_decode_param_2, .param .u64 huffman_decode_param_3, .param .u64 huffman_decode_param_4, .param .u32 huffman_decode_param_5, .param .u32 huffman_decode_param_6, .param .u32 huffman_decode_param_7 ) { .reg .pred %p<33>; .reg .b16 %rs<48>; .reg .b32 %r<253>; .reg .b64 %rd<102>; ld.param.u64 %rd26, [huffman_decode_param_0]; ld.param.u64 %rd22, [huffman_decode_param_1]; ld.param.u64 %rd23, [huffman_decode_param_2]; ld.param.u64 %rd24, [huffman_decode_param_3]; ld.param.u64 %rd25, [huffman_decode_param_4]; ld.param.u32 %r65, [huffman_decode_param_5]; ld.param.u32 %r66, [huffman_decode_param_6]; ld.param.u32 %r67, [huffman_decode_param_7]; cvta.to.global.u64 %rd1, %rd26; mov.u32 %r1, %ntid.x; mul.lo.s32 %r2, %r1, %r65; mov.u32 %r68, shared_mem; add.s32 %r3, %r68, %r2; shl.b32 %r69, %r1, 2; add.s32 %r4, %r2, %r69; mov.u32 %r5, %ctaid.x; mov.u32 %r252, %tid.x; mad.lo.s32 %r7, %r5, %r1, %r252; mul.lo.s32 %r8, %r252, %r65; setp.gt.s32 %p1, %r252, 1279; @%p1 bra $L__BB0_3; mov.u32 %r235, %r252; $L__BB0_2: cvt.s64.s32 %rd27, %r235; mov.u64 %rd28, shared_mem_constants; add.s64 %rd29, %rd28, %rd27; ld.const.u8 %rs19, [%rd29]; add.s32 %r71, %r68, %r235; st.volatile.shared.u8 [%r71], %rs19; add.s32 %r235, %r235, %r1; setp.lt.s32 %p2, %r235, 1280; @%p2 bra $L__BB0_2; $L__BB0_3: add.s32 %r11, %r1, -1; setp.eq.s32 %p3, %r252, %r11; add.s32 %r72, %r65, 4; selp.b32 %r12, %r72, %r65, %p3; mul.lo.s32 %r13, %r7, %r65; sub.s32 %r73, %r66, %r13; min.s32 %r14, %r12, %r73; setp.gt.s32 %p4, %r14, 0; @%p4 bra $L__BB0_4; bra.uni $L__BB0_10; $L__BB0_4: not.b32 %r75, %r66; add.s32 %r76, %r13, %r75; not.b32 %r77, %r12; max.s32 %r78, %r76, %r77; mov.u32 %r79, -2; sub.s32 %r80, %r79, %r78; and.b32 %r240, %r14, 3; setp.lt.u32 %p5, %r80, 3; mov.u32 %r238, 0; @%p5 bra $L__BB0_7; sub.s32 %r237, %r14, %r240; mov.u32 %r238, 0; $L__BB0_6: add.s32 %r82, %r238, %r13; cvt.s64.s32 %rd30, %r82; add.s64 %rd31, %rd1, %rd30; ld.global.nc.u8 %rs20, [%rd31]; add.s32 %r83, %r8, %r238; add.s32 %r85, %r68, %r83; st.volatile.shared.u8 [%r85+1280], %rs20; ld.global.nc.u8 %rs21, [%rd31+1]; st.volatile.shared.u8 [%r85+1281], %rs21; ld.global.nc.u8 %rs22, [%rd31+2]; st.volatile.shared.u8 [%r85+1282], %rs22; ld.global.nc.u8 %rs23, [%rd31+3]; st.volatile.shared.u8 [%r85+1283], %rs23; add.s32 %r238, %r238, 4; add.s32 %r237, %r237, -4; setp.ne.s32 %p6, %r237, 0; @%p6 bra $L__BB0_6; $L__BB0_7: setp.eq.s32 %p7, %r240, 0; @%p7 bra $L__BB0_10; add.s32 %r86, %r238, %r8; add.s32 %r88, %r68, %r86; add.s32 %r239, %r88, 1280; add.s32 %r89, %r238, %r13; cvt.s64.s32 %rd32, %r89; add.s64 %rd97, %rd1, %rd32; $L__BB0_9: .pragma "nounroll"; ld.global.nc.u8 %rs24, [%rd97]; st.volatile.shared.u8 [%r239], %rs24; add.s32 %r239, %r239, 1; add.s64 %rd97, %rd97, 1; add.s32 %r240, %r240, -1; setp.ne.s32 %p8, %r240, 0; @%p8 bra $L__BB0_9; $L__BB0_10: add.s32 %r27, %r3, 1284; bar.sync 0; mul.lo.s32 %r93, %r7, 5; shr.s32 %r94, %r93, 31; shr.u32 %r95, %r94, 29; add.s32 %r96, %r93, %r95; shr.s32 %r97, %r96, 3; cvt.s64.s32 %rd33, %r97; cvta.to.global.u64 %rd34, %rd24; add.s64 %rd35, %rd34, %rd33; ld.global.nc.u8 %rs25, [%rd35+1]; cvt.u32.u16 %r98, %rs25; and.b32 %r99, %r98, 255; ld.global.nc.u8 %rs26, [%rd35]; cvt.u32.u16 %r100, %rs26; prmt.b32 %r249, %r100, %r99, 30212; mov.u32 %r241, 0; and.b32 %r101, %r96, -8; sub.s32 %r102, %r101, %r93; add.s32 %r103, %r102, 11; shr.u32 %r104, %r249, %r103; cvt.u64.u32 %rd36, %r104; cvt.u16.u32 %rs27, %r104; and.b16 %rs45, %rs27, 31; add.s32 %r29, %r68, %r8; ld.volatile.shared.u8 %rd37, [%r29+1280]; shl.b64 %rd38, %rd37, 56; ld.volatile.shared.u8 %rd39, [%r29+1281]; shl.b64 %rd40, %rd39, 48; or.b64 %rd41, %rd40, %rd38; ld.volatile.shared.u8 %rd42, [%r29+1282]; shl.b64 %rd43, %rd42, 40; or.b64 %rd44, %rd41, %rd43; ld.volatile.shared.u8 %rd45, [%r29+1283]; shl.b64 %rd46, %rd45, 32; or.b64 %rd47, %rd44, %rd46; ld.volatile.shared.u8 %r105, [%r29+1284]; mul.wide.u32 %rd48, %r105, 16777216; or.b64 %rd49, %rd47, %rd48; ld.volatile.shared.u8 %r106, [%r29+1285]; mul.wide.u32 %rd50, %r106, 65536; ld.volatile.shared.u8 %r107, [%r29+1286]; mul.wide.u32 %rd51, %r107, 256; or.b64 %rd52, %rd49, %rd50; ld.volatile.shared.u8 %rd53, [%r29+1287]; or.b64 %rd54, %rd52, %rd51; or.b64 %rd55, %rd54, %rd53; add.s32 %r251, %r8, 8; and.b64 %rd5, %rd36, 31; and.b32 %r108, %r104, 31; shl.b64 %rd98, %rd55, %r108; mov.u32 %r109, -8; sub.s32 %r31, %r109, %r8; cvta.to.global.u64 %rd7, %rd25; cvta.to.global.u64 %rd8, %rd22; cvta.to.global.u64 %rd9, %rd23; add.s32 %r32, %r68, %r4; mov.u16 %rs43, %rs45; mov.u32 %r243, %r251; bra.uni $L__BB0_11; $L__BB0_47: add.s32 %r224, %r68, %r243; ld.volatile.shared.u8 %r225, [%r224+1280]; shl.b32 %r226, %r225, 24; ld.volatile.shared.u8 %r227, [%r224+1281]; shl.b32 %r228, %r227, 16; or.b32 %r229, %r228, %r226; ld.volatile.shared.u8 %rs42, [%r224+1282]; mul.wide.u16 %r230, %rs42, 256; or.b32 %r231, %r229, %r230; ld.volatile.shared.u8 %r232, [%r224+1283]; or.b32 %r249, %r231, %r232; add.s32 %r243, %r243, 4; cvt.u64.u32 %rd92, %r249; cvt.u64.u16 %rd93, %rs43; and.b64 %rd94, %rd93, 255; add.s64 %rd95, %rd94, 4294967264; cvt.u32.u64 %r233, %rd95; shl.b64 %rd96, %rd92, %r233; or.b64 %rd98, %rd96, %rd98; add.s16 %rs43, %rs43, -32; mov.u32 %r241, %r36; $L__BB0_11: shr.u64 %rd56, %rd98, 56; cvt.u32.u64 %r110, %rd56; add.s32 %r112, %r68, %r110; ld.volatile.shared.u8 %rs44, [%r112]; setp.ne.s16 %p9, %rs44, 0; @%p9 bra $L__BB0_15; shr.u64 %rd57, %rd98, 48; cvt.u32.u64 %r113, %rd57; and.b32 %r114, %r113, 255; add.s32 %r116, %r68, %r114; ld.volatile.shared.u8 %rs44, [%r116+256]; setp.ne.s16 %p10, %rs44, 0; @%p10 bra $L__BB0_15; shr.u64 %rd58, %rd98, 40; cvt.u32.u64 %r117, %rd58; and.b32 %r118, %r117, 255; add.s32 %r120, %r68, %r118; ld.volatile.shared.u8 %rs44, [%r120+512]; setp.ne.s16 %p11, %rs44, 0; @%p11 bra $L__BB0_15; shr.u64 %rd59, %rd98, 32; cvt.u32.u64 %r121, %rd59; and.b32 %r122, %r121, 255; add.s32 %r124, %r68, %r122; ld.volatile.shared.u8 %rs44, [%r124+768]; $L__BB0_15: add.s32 %r36, %r241, 1; cvt.u32.u16 %r125, %rs44; and.b32 %r126, %r125, 255; add.s32 %r128, %r68, %r126; ld.volatile.shared.u8 %rs28, [%r128+1024]; cvt.u32.u16 %r129, %rs28; and.b32 %r130, %r129, 255; shl.b64 %rd98, %rd98, %r130; add.s16 %rs43, %rs28, %rs43; and.b16 %rs29, %rs43, 248; shr.u16 %rs30, %rs29, 3; cvt.u32.u16 %r131, %rs30; add.s32 %r132, %r31, %r243; add.s32 %r133, %r132, %r131; setp.lt.u32 %p12, %r133, %r65; @%p12 bra $L__BB0_46; bra.uni $L__BB0_16; $L__BB0_46: and.b16 %rs41, %rs43, 255; setp.lt.u16 %p32, %rs41, 32; mov.u32 %r241, %r36; @%p32 bra $L__BB0_11; bra.uni $L__BB0_47; $L__BB0_16: setp.eq.s32 %p13, %r252, 0; shl.b32 %r134, %r252, 2; add.s32 %r37, %r3, %r134; add.s32 %r38, %r3, %r69; @%p13 bra $L__BB0_18; add.s32 %r234, %r241, 1; st.volatile.shared.u32 [%r37+1284], %r234; bra.uni $L__BB0_19; $L__BB0_18: mul.wide.u32 %rd60, %r5, 4; add.s64 %rd61, %rd9, %rd60; ld.global.nc.u32 %r136, [%rd61]; st.volatile.shared.u32 [%r38+1284], %r136; ld.volatile.shared.u32 %r137, [%r38+1284]; add.s32 %r138, %r137, %r36; st.volatile.shared.u32 [%r3+1284], %r138; $L__BB0_19: bar.sync 0; setp.lt.u32 %p14, %r1, 2; @%p14 bra $L__BB0_24; add.s32 %r39, %r252, 1; mov.u32 %r244, 2; $L__BB0_21: rem.u32 %r140, %r39, %r244; setp.ne.s32 %p15, %r140, 0; @%p15 bra $L__BB0_23; shr.u32 %r141, %r244, 1; sub.s32 %r142, %r252, %r141; shl.b32 %r143, %r142, 2; add.s32 %r144, %r27, %r143; ld.volatile.shared.u32 %r145, [%r37+1284]; ld.volatile.shared.u32 %r146, [%r144]; add.s32 %r147, %r145, %r146; st.volatile.shared.u32 [%r37+1284], %r147; $L__BB0_23: bar.sync 0; shl.b32 %r244, %r244, 1; setp.le.u32 %p16, %r244, %r1; @%p16 bra $L__BB0_21; $L__BB0_24: setp.ne.s32 %p17, %r252, 0; @%p17 bra $L__BB0_26; mov.u32 %r148, 0; st.volatile.shared.u32 [%r38+1280], %r148; $L__BB0_26: bar.sync 0; setp.lt.s32 %p18, %r1, 2; @%p18 bra $L__BB0_31; add.s32 %r42, %r252, 1; mov.u32 %r245, %r1; $L__BB0_28: rem.u32 %r149, %r42, %r245; setp.eq.s32 %p19, %r149, 0; @%p19 bra $L__BB0_29; bra.uni $L__BB0_30; $L__BB0_29: shr.u32 %r150, %r245, 1; sub.s32 %r151, %r252, %r150; shl.b32 %r152, %r151, 2; add.s32 %r153, %r27, %r152; ld.volatile.shared.u32 %r154, [%r37+1284]; ld.volatile.shared.u32 %r155, [%r153]; add.s32 %r156, %r154, %r155; st.volatile.shared.u32 [%r37+1284], %r156; ld.volatile.shared.u32 %r157, [%r153]; ld.volatile.shared.u32 %r158, [%r37+1284]; sub.s32 %r159, %r158, %r157; st.volatile.shared.u32 [%r153], %r159; $L__BB0_30: shr.u32 %r44, %r245, 1; bar.sync 0; setp.gt.u32 %p20, %r245, 3; mov.u32 %r245, %r44; @%p20 bra $L__BB0_28; $L__BB0_31: @%p3 bra $L__BB0_32; bra.uni $L__BB0_33; $L__BB0_32: ld.volatile.shared.u32 %r160, [%r38+1284]; st.volatile.shared.u32 [%r3+1284], %r160; ld.volatile.shared.u32 %r161, [%r37+1284]; add.s32 %r162, %r161, %r36; st.volatile.shared.u32 [%r38+1284], %r162; $L__BB0_33: bar.sync 0; ld.volatile.shared.u32 %r163, [%r37+1284]; cvt.u64.u32 %rd12, %r163; ld.volatile.shared.u32 %r45, [%r3+1284]; add.s32 %r46, %r163, %r36; ld.volatile.shared.u8 %rd62, [%r29+1280]; shl.b64 %rd63, %rd62, 56; ld.volatile.shared.u8 %rd64, [%r29+1281]; shl.b64 %rd65, %rd64, 48; or.b64 %rd66, %rd65, %rd63; ld.volatile.shared.u8 %rd67, [%r29+1282]; shl.b64 %rd68, %rd67, 40; or.b64 %rd69, %rd66, %rd68; ld.volatile.shared.u8 %rd70, [%r29+1283]; shl.b64 %rd71, %rd70, 32; or.b64 %rd72, %rd69, %rd71; ld.volatile.shared.u8 %r164, [%r29+1284]; mul.wide.u32 %rd73, %r164, 16777216; ld.volatile.shared.u8 %r165, [%r29+1285]; mul.wide.u32 %rd74, %r165, 65536; ld.volatile.shared.u8 %r166, [%r29+1286]; mul.wide.u32 %rd75, %r166, 256; ld.volatile.shared.u8 %rd76, [%r29+1287]; or.b64 %rd77, %rd72, %rd73; or.b64 %rd78, %rd77, %rd74; or.b64 %rd79, %rd78, %rd75; or.b64 %rd80, %rd79, %rd76; cvt.u32.u64 %r167, %rd5; shl.b64 %rd100, %rd80, %r167; setp.ge.u32 %p22, %r163, %r67; @%p22 bra $L__BB0_43; cvt.u32.u64 %r168, %rd12; add.s64 %rd99, %rd8, %rd12; shl.b32 %r169, %r168, 1; add.s32 %r170, %r4, %r169; add.s32 %r171, %r170, 1288; shl.b32 %r172, %r45, 1; sub.s32 %r173, %r171, %r172; add.s32 %r247, %r68, %r173; add.s32 %r246, %r168, 1; bra.uni $L__BB0_35; $L__BB0_42: add.s64 %rd99, %rd99, 1; add.s32 %r247, %r247, 2; add.s32 %r246, %r246, 1; $L__BB0_35: shr.u64 %rd81, %rd100, 56; cvt.u32.u64 %r175, %rd81; add.s32 %r177, %r68, %r175; ld.volatile.shared.u8 %rs46, [%r177]; setp.ne.s16 %p23, %rs46, 0; @%p23 bra $L__BB0_39; shr.u64 %rd82, %rd100, 48; cvt.u32.u64 %r178, %rd82; and.b32 %r179, %r178, 255; add.s32 %r181, %r68, %r179; ld.volatile.shared.u8 %rs46, [%r181+256]; setp.ne.s16 %p24, %rs46, 0; @%p24 bra $L__BB0_39; shr.u64 %rd83, %rd100, 40; cvt.u32.u64 %r182, %rd83; and.b32 %r183, %r182, 255; add.s32 %r185, %r68, %r183; ld.volatile.shared.u8 %rs46, [%r185+512]; setp.ne.s16 %p25, %rs46, 0; @%p25 bra $L__BB0_39; shr.u64 %rd84, %rd100, 32; cvt.u32.u64 %r186, %rd84; and.b32 %r187, %r186, 255; add.s32 %r189, %r68, %r187; ld.volatile.shared.u8 %rs46, [%r189+768]; $L__BB0_39: ld.global.nc.u8 %rs31, [%rd99]; and.b16 %rs32, %rs31, 128; and.b16 %rs33, %rs46, 254; shr.u16 %rs34, %rs33, 1; or.b16 %rs35, %rs32, %rs34; mul.wide.u16 %r190, %rs35, 256; and.b16 %rs36, %rs31, 127; cvt.u32.u16 %r191, %rs46; cvt.u32.u16 %r192, %rs36; bfi.b32 %r193, %r191, %r192, 7, 9; and.b32 %r194, %r193, 255; and.b32 %r195, %r249, -65536; or.b32 %r196, %r195, %r194; or.b32 %r249, %r196, %r190; st.volatile.shared.u16 [%r247], %r249; setp.ge.u32 %p26, %r246, %r67; setp.ge.u32 %p27, %r246, %r46; or.pred %p28, %p26, %p27; @%p28 bra $L__BB0_43; and.b32 %r198, %r191, 255; add.s32 %r200, %r68, %r198; ld.volatile.shared.u8 %rs37, [%r200+1024]; cvt.u32.u16 %r201, %rs37; and.b32 %r202, %r201, 255; shl.b64 %rd100, %rd100, %r202; add.s16 %rs45, %rs37, %rs45; and.b16 %rs38, %rs45, 255; setp.lt.u16 %p29, %rs38, 32; @%p29 bra $L__BB0_42; add.s32 %r204, %r68, %r251; ld.volatile.shared.u8 %r205, [%r204+1280]; shl.b32 %r206, %r205, 24; ld.volatile.shared.u8 %r207, [%r204+1281]; shl.b32 %r208, %r207, 16; or.b32 %r209, %r208, %r206; ld.volatile.shared.u8 %rs39, [%r204+1282]; mul.wide.u16 %r210, %rs39, 256; or.b32 %r211, %r209, %r210; ld.volatile.shared.u8 %r212, [%r204+1283]; or.b32 %r249, %r211, %r212; add.s32 %r251, %r251, 4; cvt.u64.u32 %rd85, %r249; cvt.u64.u16 %rd86, %rs45; and.b64 %rd87, %rd86, 255; add.s64 %rd88, %rd87, 4294967264; cvt.u32.u64 %r213, %rd88; shl.b64 %rd89, %rd85, %r213; or.b64 %rd100, %rd89, %rd100; add.s16 %rs45, %rs45, -32; bra.uni $L__BB0_42; $L__BB0_43: bar.sync 0; ld.volatile.shared.u32 %r214, [%r38+1284]; sub.s32 %r215, %r214, %r45; sub.s32 %r60, %r67, %r45; min.u32 %r216, %r215, %r60; setp.ge.u32 %p30, %r252, %r216; @%p30 bra $L__BB0_45; $L__BB0_44: shl.b32 %r217, %r252, 1; add.s32 %r218, %r32, %r217; ld.volatile.shared.u16 %rs40, [%r218+1288]; add.s32 %r219, %r252, %r45; mul.wide.u32 %rd90, %r219, 2; add.s64 %rd91, %rd7, %rd90; st.global.u16 [%rd91], %rs40; ld.volatile.shared.u32 %r220, [%r38+1284]; sub.s32 %r221, %r220, %r45; min.u32 %r222, %r221, %r60; add.s32 %r252, %r252, %r1; setp.lt.u32 %p31, %r252, %r222; @%p31 bra $L__BB0_44; $L__BB0_45: ret; }