| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| .macro function name, align=DEFAULT_ALIGN |
| .macro endfunc |
| jirl $r0, $r1, 0x0 |
| .size ASM_PREF\name, . - ASM_PREF\name |
| .purgem endfunc |
| .endm |
| .text ; |
| .align \align ; |
| .globl ASM_PREF\name ; |
| .type ASM_PREF\name, @function ; |
| ASM_PREF\name: ; |
| .endm |
|
|
| |
| |
| |
| |
| .macro alloc_stack size, align=0 |
| .if \align |
| .macro clean_stack |
| add.d sp, sp, t7 |
| .endm |
| addi.d sp, sp, - \size |
| andi.d t7, sp, \align - 1 |
| sub.d sp, sp, t7 |
| addi.d t7, t7, \size |
| .else |
| .macro clean_stack |
| addi.d sp, sp, \size |
| .endm |
| addi.d sp, sp, - \size |
| .endif |
| .endm |
|
|
| .macro const name, align=DEFAULT_ALIGN |
| .macro endconst |
| .size \name, . - \name |
| .purgem endconst |
| .endm |
| .section .rodata |
| .align \align |
| \name: |
| .endm |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| .macro vdp2.h.bu vd, vj, vk |
| vmulwev.h.bu \vd, \vj, \vk |
| vmaddwod.h.bu \vd, \vj, \vk |
| .endm |
|
|
| .macro vdp2.h.bu.b vd, vj, vk |
| vmulwev.h.bu.b \vd, \vj, \vk |
| vmaddwod.h.bu.b \vd, \vj, \vk |
| .endm |
|
|
| .macro vdp2.w.h vd, vj, vk |
| vmulwev.w.h \vd, \vj, \vk |
| vmaddwod.w.h \vd, \vj, \vk |
| .endm |
|
|
| .macro xvdp2.h.bu xd, xj, xk |
| xvmulwev.h.bu \xd, \xj, \xk |
| xvmaddwod.h.bu \xd, \xj, \xk |
| .endm |
|
|
| .macro xvdp2.h.bu.b xd, xj, xk |
| xvmulwev.h.bu.b \xd, \xj, \xk |
| xvmaddwod.h.bu.b \xd, \xj, \xk |
| .endm |
|
|
| .macro xvdp2.w.h xd, xj, xk |
| xvmulwev.w.h \xd, \xj, \xk |
| xvmaddwod.w.h \xd, \xj, \xk |
| .endm |
|
|
| |
| |
| |
| |
| |
| |
| .macro vdp2add.h.bu vd, vj, vk |
| vmaddwev.h.bu \vd, \vj, \vk |
| vmaddwod.h.bu \vd, \vj, \vk |
| .endm |
|
|
| .macro vdp2add.h.bu.b vd, vj, vk |
| vmaddwev.h.bu.b \vd, \vj, \vk |
| vmaddwod.h.bu.b \vd, \vj, \vk |
| .endm |
|
|
| .macro vdp2add.w.h vd, vj, vk |
| vmaddwev.w.h \vd, \vj, \vk |
| vmaddwod.w.h \vd, \vj, \vk |
| .endm |
|
|
| .macro xvdp2add.h.bu.b xd, xj, xk |
| xvmaddwev.h.bu.b \xd, \xj, \xk |
| xvmaddwod.h.bu.b \xd, \xj, \xk |
| .endm |
|
|
| .macro xvdp2add.w.h xd, xj, xk |
| xvmaddwev.w.h \xd, \xj, \xk |
| xvmaddwod.w.h \xd, \xj, \xk |
| .endm |
|
|
| |
| |
| |
| |
| |
| .macro vclip.h vd, vj, vk, va |
| vmax.h \vd, \vj, \vk |
| vmin.h \vd, \vd, \va |
| .endm |
|
|
| .macro vclip255.w vd, vj |
| vmaxi.w \vd, \vj, 0 |
| vsat.wu \vd, \vd, 7 |
| .endm |
|
|
| .macro vclip255.h vd, vj |
| vmaxi.h \vd, \vj, 0 |
| vsat.hu \vd, \vd, 7 |
| .endm |
|
|
| .macro xvclip.h xd, xj, xk, xa |
| xvmax.h \xd, \xj, \xk |
| xvmin.h \xd, \xd, \xa |
| .endm |
|
|
| .macro xvclip255.h xd, xj |
| xvmaxi.h \xd, \xj, 0 |
| xvsat.hu \xd, \xd, 7 |
| .endm |
|
|
| .macro xvclip255.w xd, xj |
| xvmaxi.w \xd, \xj, 0 |
| xvsat.wu \xd, \xd, 7 |
| .endm |
|
|
| |
| |
| |
| |
| |
| |
| |
| .macro vstelmx.b vd, rk, ra, si |
| add.d \rk, \rk, \ra |
| vstelm.b \vd, \rk, 0, \si |
| .endm |
|
|
| .macro vstelmx.h vd, rk, ra, si |
| add.d \rk, \rk, \ra |
| vstelm.h \vd, \rk, 0, \si |
| .endm |
|
|
| .macro vstelmx.w vd, rk, ra, si |
| add.d \rk, \rk, \ra |
| vstelm.w \vd, \rk, 0, \si |
| .endm |
|
|
| .macro vstelmx.d vd, rk, ra, si |
| add.d \rk, \rk, \ra |
| vstelm.d \vd, \rk, 0, \si |
| .endm |
|
|
| .macro vmov xd, xj |
| vor.v \xd, \xj, \xj |
| .endm |
|
|
| .macro xmov xd, xj |
| xvor.v \xd, \xj, \xj |
| .endm |
|
|
| .macro xvstelmx.d xd, rk, ra, si |
| add.d \rk, \rk, \ra |
| xvstelm.d \xd, \rk, 0, \si |
| .endm |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| .macro FLDS_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3 |
| fld.s \out0, \src, 0 |
| fldx.s \out1, \src, \stride |
| fldx.s \out2, \src, \stride2 |
| fldx.s \out3, \src, \stride3 |
| .endm |
|
|
| .macro FLDD_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3 |
| fld.d \out0, \src, 0 |
| fldx.d \out1, \src, \stride |
| fldx.d \out2, \src, \stride2 |
| fldx.d \out3, \src, \stride3 |
| .endm |
|
|
| .macro LSX_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3 |
| vld \out0, \src, 0 |
| vldx \out1, \src, \stride |
| vldx \out2, \src, \stride2 |
| vldx \out3, \src, \stride3 |
| .endm |
|
|
| .macro LASX_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3 |
| xvld \out0, \src, 0 |
| xvldx \out1, \src, \stride |
| xvldx \out2, \src, \stride2 |
| xvldx \out3, \src, \stride3 |
| .endm |
|
|
| |
| |
| |
| |
| |
| .macro LSX_TRANSPOSE4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \ |
| tmp0, tmp1 |
| vilvl.h \tmp0, \in1, \in0 |
| vilvl.h \tmp1, \in3, \in2 |
| vilvl.w \out0, \tmp1, \tmp0 |
| vilvh.w \out2, \tmp1, \tmp0 |
| vilvh.d \out1, \out0, \out0 |
| vilvh.d \out3, \out0, \out2 |
| .endm |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| .macro LSX_TRANSPOSE4x4_W _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3, \ |
| _tmp0, _tmp1 |
|
|
| vilvl.w \_tmp0, \_in1, \_in0 |
| vilvh.w \_out1, \_in1, \_in0 |
| vilvl.w \_tmp1, \_in3, \_in2 |
| vilvh.w \_out3, \_in3, \_in2 |
|
|
| vilvl.d \_out0, \_tmp1, \_tmp0 |
| vilvl.d \_out2, \_out3, \_out1 |
| vilvh.d \_out3, \_out3, \_out1 |
| vilvh.d \_out1, \_tmp1, \_tmp0 |
| .endm |
|
|
| |
| |
| |
| |
| |
| .macro LSX_TRANSPOSE8x8_H in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ |
| out2, out3, out4, out5, out6, out7, tmp0, tmp1, tmp2, \ |
| tmp3, tmp4, tmp5, tmp6, tmp7 |
| vilvl.h \tmp0, \in6, \in4 |
| vilvl.h \tmp1, \in7, \in5 |
| vilvl.h \tmp2, \in2, \in0 |
| vilvl.h \tmp3, \in3, \in1 |
|
|
| vilvl.h \tmp4, \tmp1, \tmp0 |
| vilvh.h \tmp5, \tmp1, \tmp0 |
| vilvl.h \tmp6, \tmp3, \tmp2 |
| vilvh.h \tmp7, \tmp3, \tmp2 |
|
|
| vilvh.h \tmp0, \in6, \in4 |
| vilvh.h \tmp1, \in7, \in5 |
| vilvh.h \tmp2, \in2, \in0 |
| vilvh.h \tmp3, \in3, \in1 |
|
|
| vpickev.d \out0, \tmp4, \tmp6 |
| vpickod.d \out1, \tmp4, \tmp6 |
| vpickev.d \out2, \tmp5, \tmp7 |
| vpickod.d \out3, \tmp5, \tmp7 |
|
|
| vilvl.h \tmp4, \tmp1, \tmp0 |
| vilvh.h \tmp5, \tmp1, \tmp0 |
| vilvl.h \tmp6, \tmp3, \tmp2 |
| vilvh.h \tmp7, \tmp3, \tmp2 |
|
|
| vpickev.d \out4, \tmp4, \tmp6 |
| vpickod.d \out5, \tmp4, \tmp6 |
| vpickev.d \out6, \tmp5, \tmp7 |
| vpickod.d \out7, \tmp5, \tmp7 |
| .endm |
|
|
| |
| |
| |
| |
| |
| .macro LASX_TRANSPOSE16X8_B in0, in1, in2, in3, in4, in5, in6, in7, \ |
| in8, in9, in10, in11, in12, in13, in14, in15, \ |
| out0, out1, out2, out3, out4, out5, out6, out7,\ |
| tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 |
| xvilvl.b \tmp0, \in2, \in0 |
| xvilvl.b \tmp1, \in3, \in1 |
| xvilvl.b \tmp2, \in6, \in4 |
| xvilvl.b \tmp3, \in7, \in5 |
| xvilvl.b \tmp4, \in10, \in8 |
| xvilvl.b \tmp5, \in11, \in9 |
| xvilvl.b \tmp6, \in14, \in12 |
| xvilvl.b \tmp7, \in15, \in13 |
| xvilvl.b \out0, \tmp1, \tmp0 |
| xvilvh.b \out1, \tmp1, \tmp0 |
| xvilvl.b \out2, \tmp3, \tmp2 |
| xvilvh.b \out3, \tmp3, \tmp2 |
| xvilvl.b \out4, \tmp5, \tmp4 |
| xvilvh.b \out5, \tmp5, \tmp4 |
| xvilvl.b \out6, \tmp7, \tmp6 |
| xvilvh.b \out7, \tmp7, \tmp6 |
| xvilvl.w \tmp0, \out2, \out0 |
| xvilvh.w \tmp2, \out2, \out0 |
| xvilvl.w \tmp4, \out3, \out1 |
| xvilvh.w \tmp6, \out3, \out1 |
| xvilvl.w \tmp1, \out6, \out4 |
| xvilvh.w \tmp3, \out6, \out4 |
| xvilvl.w \tmp5, \out7, \out5 |
| xvilvh.w \tmp7, \out7, \out5 |
| xvilvl.d \out0, \tmp1, \tmp0 |
| xvilvh.d \out1, \tmp1, \tmp0 |
| xvilvl.d \out2, \tmp3, \tmp2 |
| xvilvh.d \out3, \tmp3, \tmp2 |
| xvilvl.d \out4, \tmp5, \tmp4 |
| xvilvh.d \out5, \tmp5, \tmp4 |
| xvilvl.d \out6, \tmp7, \tmp6 |
| xvilvh.d \out7, \tmp7, \tmp6 |
| .endm |
|
|
| |
| |
| |
| |
| |
| .macro LSX_TRANSPOSE16X8_B in0, in1, in2, in3, in4, in5, in6, in7, \ |
| in8, in9, in10, in11, in12, in13, in14, in15, \ |
| out0, out1, out2, out3, out4, out5, out6, out7,\ |
| tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 |
| vilvl.b \tmp0, \in2, \in0 |
| vilvl.b \tmp1, \in3, \in1 |
| vilvl.b \tmp2, \in6, \in4 |
| vilvl.b \tmp3, \in7, \in5 |
| vilvl.b \tmp4, \in10, \in8 |
| vilvl.b \tmp5, \in11, \in9 |
| vilvl.b \tmp6, \in14, \in12 |
| vilvl.b \tmp7, \in15, \in13 |
|
|
| vilvl.b \out0, \tmp1, \tmp0 |
| vilvh.b \out1, \tmp1, \tmp0 |
| vilvl.b \out2, \tmp3, \tmp2 |
| vilvh.b \out3, \tmp3, \tmp2 |
| vilvl.b \out4, \tmp5, \tmp4 |
| vilvh.b \out5, \tmp5, \tmp4 |
| vilvl.b \out6, \tmp7, \tmp6 |
| vilvh.b \out7, \tmp7, \tmp6 |
| vilvl.w \tmp0, \out2, \out0 |
| vilvh.w \tmp2, \out2, \out0 |
| vilvl.w \tmp4, \out3, \out1 |
| vilvh.w \tmp6, \out3, \out1 |
| vilvl.w \tmp1, \out6, \out4 |
| vilvh.w \tmp3, \out6, \out4 |
| vilvl.w \tmp5, \out7, \out5 |
| vilvh.w \tmp7, \out7, \out5 |
| vilvl.d \out0, \tmp1, \tmp0 |
| vilvh.d \out1, \tmp1, \tmp0 |
| vilvl.d \out2, \tmp3, \tmp2 |
| vilvh.d \out3, \tmp3, \tmp2 |
| vilvl.d \out4, \tmp5, \tmp4 |
| vilvh.d \out5, \tmp5, \tmp4 |
| vilvl.d \out6, \tmp7, \tmp6 |
| vilvh.d \out7, \tmp7, \tmp6 |
| .endm |
|
|
| |
| |
| |
| |
| |
| .macro LASX_TRANSPOSE4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \ |
| tmp0, tmp1 |
| xvilvl.h \tmp0, \in1, \in0 |
| xvilvl.h \tmp1, \in3, \in2 |
| xvilvl.w \out0, \tmp1, \tmp0 |
| xvilvh.w \out2, \tmp1, \tmp0 |
| xvilvh.d \out1, \out0, \out0 |
| xvilvh.d \out3, \out0, \out2 |
| .endm |
|
|
| |
| |
| |
| |
| |
| .macro LASX_TRANSPOSE4x8_H in0, in1, in2, in3, out0, out1, out2, out3, \ |
| tmp0, tmp1 |
| xvilvl.h \tmp0, \in2, \in0 |
| xvilvl.h \tmp1, \in3, \in1 |
| xvilvl.h \out2, \tmp1, \tmp0 |
| xvilvh.h \out3, \tmp1, \tmp0 |
|
|
| xvilvl.d \out0, \out2, \out2 |
| xvilvh.d \out1, \out2, \out2 |
| xvilvl.d \out2, \out3, \out3 |
| xvilvh.d \out3, \out3, \out3 |
| .endm |
|
|
| |
| |
| |
| |
| |
| .macro LASX_TRANSPOSE8x8_H in0, in1, in2, in3, in4, in5, in6, in7, \ |
| out0, out1, out2, out3, out4, out5, out6, out7, \ |
| tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 |
| xvilvl.h \tmp0, \in6, \in4 |
| xvilvl.h \tmp1, \in7, \in5 |
| xvilvl.h \tmp2, \in2, \in0 |
| xvilvl.h \tmp3, \in3, \in1 |
|
|
| xvilvl.h \tmp4, \tmp1, \tmp0 |
| xvilvh.h \tmp5, \tmp1, \tmp0 |
| xvilvl.h \tmp6, \tmp3, \tmp2 |
| xvilvh.h \tmp7, \tmp3, \tmp2 |
|
|
| xvilvh.h \tmp0, \in6, \in4 |
| xvilvh.h \tmp1, \in7, \in5 |
| xvilvh.h \tmp2, \in2, \in0 |
| xvilvh.h \tmp3, \in3, \in1 |
|
|
| xvpickev.d \out0, \tmp4, \tmp6 |
| xvpickod.d \out1, \tmp4, \tmp6 |
| xvpickev.d \out2, \tmp5, \tmp7 |
| xvpickod.d \out3, \tmp5, \tmp7 |
|
|
| xvilvl.h \tmp4, \tmp1, \tmp0 |
| xvilvh.h \tmp5, \tmp1, \tmp0 |
| xvilvl.h \tmp6, \tmp3, \tmp2 |
| xvilvh.h \tmp7, \tmp3, \tmp2 |
|
|
| xvpickev.d \out4, \tmp4, \tmp6 |
| xvpickod.d \out5, \tmp4, \tmp6 |
| xvpickev.d \out6, \tmp5, \tmp7 |
| xvpickod.d \out7, \tmp5, \tmp7 |
| .endm |
|
|
| |
| |
| |
| |
| |
| .macro LASX_TRANSPOSE2x4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \ |
| tmp0, tmp1, tmp2 |
| xvilvh.h \tmp1, \in0, \in1 |
| xvilvl.h \out1, \in0, \in1 |
| xvilvh.h \tmp0, \in2, \in3 |
| xvilvl.h \out3, \in2, \in3 |
|
|
| xvilvh.w \tmp2, \out3, \out1 |
| xvilvl.w \out3, \out3, \out1 |
|
|
| xvilvl.w \out2, \tmp0, \tmp1 |
| xvilvh.w \tmp1, \tmp0, \tmp1 |
|
|
| xvilvh.d \out0, \out2, \out3 |
| xvilvl.d \out2, \out2, \out3 |
| xvilvh.d \out1, \tmp1, \tmp2 |
| xvilvl.d \out3, \tmp1, \tmp2 |
| .endm |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| .macro LASX_TRANSPOSE4x4_W _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3, \ |
| _tmp0, _tmp1 |
|
|
| xvilvl.w \_tmp0, \_in1, \_in0 |
| xvilvh.w \_out1, \_in1, \_in0 |
| xvilvl.w \_tmp1, \_in3, \_in2 |
| xvilvh.w \_out3, \_in3, \_in2 |
|
|
| xvilvl.d \_out0, \_tmp1, \_tmp0 |
| xvilvl.d \_out2, \_out3, \_out1 |
| xvilvh.d \_out3, \_out3, \_out1 |
| xvilvh.d \_out1, \_tmp1, \_tmp0 |
| .endm |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| .macro LASX_TRANSPOSE8x8_W _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,\ |
| _out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7,\ |
| _tmp0, _tmp1, _tmp2, _tmp3 |
| xvilvl.w \_tmp0, \_in2, \_in0 |
| xvilvl.w \_tmp1, \_in3, \_in1 |
| xvilvh.w \_tmp2, \_in2, \_in0 |
| xvilvh.w \_tmp3, \_in3, \_in1 |
| xvilvl.w \_out0, \_tmp1, \_tmp0 |
| xvilvh.w \_out1, \_tmp1, \_tmp0 |
| xvilvl.w \_out2, \_tmp3, \_tmp2 |
| xvilvh.w \_out3, \_tmp3, \_tmp2 |
|
|
| xvilvl.w \_tmp0, \_in6, \_in4 |
| xvilvl.w \_tmp1, \_in7, \_in5 |
| xvilvh.w \_tmp2, \_in6, \_in4 |
| xvilvh.w \_tmp3, \_in7, \_in5 |
| xvilvl.w \_out4, \_tmp1, \_tmp0 |
| xvilvh.w \_out5, \_tmp1, \_tmp0 |
| xvilvl.w \_out6, \_tmp3, \_tmp2 |
| xvilvh.w \_out7, \_tmp3, \_tmp2 |
|
|
| xmov \_tmp0, \_out0 |
| xmov \_tmp1, \_out1 |
| xmov \_tmp2, \_out2 |
| xmov \_tmp3, \_out3 |
| xvpermi.q \_out0, \_out4, 0x02 |
| xvpermi.q \_out1, \_out5, 0x02 |
| xvpermi.q \_out2, \_out6, 0x02 |
| xvpermi.q \_out3, \_out7, 0x02 |
| xvpermi.q \_out4, \_tmp0, 0x31 |
| xvpermi.q \_out5, \_tmp1, 0x31 |
| xvpermi.q \_out6, \_tmp2, 0x31 |
| xvpermi.q \_out7, \_tmp3, 0x31 |
| .endm |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| .macro LASX_TRANSPOSE4x4_D _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3, \ |
| _tmp0, _tmp1 |
| xvilvl.d \_tmp0, \_in1, \_in0 |
| xvilvh.d \_out1, \_in1, \_in0 |
| xvilvh.d \_tmp1, \_in3, \_in2 |
| xvilvl.d \_out2, \_in3, \_in2 |
|
|
| xvor.v \_out0, \_tmp0, \_tmp0 |
| xvor.v \_out3, \_tmp1, \_tmp1 |
|
|
| xvpermi.q \_out0, \_out2, 0x02 |
| xvpermi.q \_out2, \_tmp0, 0x31 |
| xvpermi.q \_out3, \_out1, 0x31 |
| xvpermi.q \_out1, \_tmp1, 0x02 |
| .endm |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| .macro LSX_BUTTERFLY_4_B _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
| vadd.b \_out0, \_in0, \_in3 |
| vadd.b \_out1, \_in1, \_in2 |
| vsub.b \_out2, \_in1, \_in2 |
| vsub.b \_out3, \_in0, \_in3 |
| .endm |
| .macro LSX_BUTTERFLY_4_H _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
| vadd.h \_out0, \_in0, \_in3 |
| vadd.h \_out1, \_in1, \_in2 |
| vsub.h \_out2, \_in1, \_in2 |
| vsub.h \_out3, \_in0, \_in3 |
| .endm |
| .macro LSX_BUTTERFLY_4_W _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
| vadd.w \_out0, \_in0, \_in3 |
| vadd.w \_out1, \_in1, \_in2 |
| vsub.w \_out2, \_in1, \_in2 |
| vsub.w \_out3, \_in0, \_in3 |
| .endm |
| .macro LSX_BUTTERFLY_4_D _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
| vadd.d \_out0, \_in0, \_in3 |
| vadd.d \_out1, \_in1, \_in2 |
| vsub.d \_out2, \_in1, \_in2 |
| vsub.d \_out3, \_in0, \_in3 |
| .endm |
|
|
| .macro LASX_BUTTERFLY_4_B _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
| xvadd.b \_out0, \_in0, \_in3 |
| xvadd.b \_out1, \_in1, \_in2 |
| xvsub.b \_out2, \_in1, \_in2 |
| xvsub.b \_out3, \_in0, \_in3 |
| .endm |
| .macro LASX_BUTTERFLY_4_H _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
| xvadd.h \_out0, \_in0, \_in3 |
| xvadd.h \_out1, \_in1, \_in2 |
| xvsub.h \_out2, \_in1, \_in2 |
| xvsub.h \_out3, \_in0, \_in3 |
| .endm |
| .macro LASX_BUTTERFLY_4_W _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
| xvadd.w \_out0, \_in0, \_in3 |
| xvadd.w \_out1, \_in1, \_in2 |
| xvsub.w \_out2, \_in1, \_in2 |
| xvsub.w \_out3, \_in0, \_in3 |
| .endm |
| .macro LASX_BUTTERFLY_4_D _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
| xvadd.d \_out0, \_in0, \_in3 |
| xvadd.d \_out1, \_in1, \_in2 |
| xvsub.d \_out2, \_in1, \_in2 |
| xvsub.d \_out3, \_in0, \_in3 |
| .endm |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| .macro LSX_BUTTERFLY_8_B _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
| _out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
| vadd.b \_out0, \_in0, \_in7 |
| vadd.b \_out1, \_in1, \_in6 |
| vadd.b \_out2, \_in2, \_in5 |
| vadd.b \_out3, \_in3, \_in4 |
| vsub.b \_out4, \_in3, \_in4 |
| vsub.b \_out5, \_in2, \_in5 |
| vsub.b \_out6, \_in1, \_in6 |
| vsub.b \_out7, \_in0, \_in7 |
| .endm |
|
|
| .macro LSX_BUTTERFLY_8_H _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
| _out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
| vadd.h \_out0, \_in0, \_in7 |
| vadd.h \_out1, \_in1, \_in6 |
| vadd.h \_out2, \_in2, \_in5 |
| vadd.h \_out3, \_in3, \_in4 |
| vsub.h \_out4, \_in3, \_in4 |
| vsub.h \_out5, \_in2, \_in5 |
| vsub.h \_out6, \_in1, \_in6 |
| vsub.h \_out7, \_in0, \_in7 |
| .endm |
|
|
| .macro LSX_BUTTERFLY_8_W _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
| _out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
| vadd.w \_out0, \_in0, \_in7 |
| vadd.w \_out1, \_in1, \_in6 |
| vadd.w \_out2, \_in2, \_in5 |
| vadd.w \_out3, \_in3, \_in4 |
| vsub.w \_out4, \_in3, \_in4 |
| vsub.w \_out5, \_in2, \_in5 |
| vsub.w \_out6, \_in1, \_in6 |
| vsub.w \_out7, \_in0, \_in7 |
| .endm |
|
|
| .macro LSX_BUTTERFLY_8_D _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
| _out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
| vadd.d \_out0, \_in0, \_in7 |
| vadd.d \_out1, \_in1, \_in6 |
| vadd.d \_out2, \_in2, \_in5 |
| vadd.d \_out3, \_in3, \_in4 |
| vsub.d \_out4, \_in3, \_in4 |
| vsub.d \_out5, \_in2, \_in5 |
| vsub.d \_out6, \_in1, \_in6 |
| vsub.d \_out7, \_in0, \_in7 |
| .endm |
|
|
| .macro LASX_BUTTERFLY_8_B _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
| _out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
| xvadd.b \_out0, \_in0, \_in7 |
| xvadd.b \_out1, \_in1, \_in6 |
| xvadd.b \_out2, \_in2, \_in5 |
| xvadd.b \_out3, \_in3, \_in4 |
| xvsub.b \_out4, \_in3, \_in4 |
| xvsub.b \_out5, \_in2, \_in5 |
| xvsub.b \_out6, \_in1, \_in6 |
| xvsub.b \_out7, \_in0, \_in7 |
| .endm |
|
|
| .macro LASX_BUTTERFLY_8_H _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
| _out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
| xvadd.h \_out0, \_in0, \_in7 |
| xvadd.h \_out1, \_in1, \_in6 |
| xvadd.h \_out2, \_in2, \_in5 |
| xvadd.h \_out3, \_in3, \_in4 |
| xvsub.h \_out4, \_in3, \_in4 |
| xvsub.h \_out5, \_in2, \_in5 |
| xvsub.h \_out6, \_in1, \_in6 |
| xvsub.h \_out7, \_in0, \_in7 |
| .endm |
|
|
| .macro LASX_BUTTERFLY_8_W _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
| _out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
| xvadd.w \_out0, \_in0, \_in7 |
| xvadd.w \_out1, \_in1, \_in6 |
| xvadd.w \_out2, \_in2, \_in5 |
| xvadd.w \_out3, \_in3, \_in4 |
| xvsub.w \_out4, \_in3, \_in4 |
| xvsub.w \_out5, \_in2, \_in5 |
| xvsub.w \_out6, \_in1, \_in6 |
| xvsub.w \_out7, \_in0, \_in7 |
| .endm |
|
|