| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | function ff_imdct_half_neon, export=1 |
| | stp x19, x20, [sp, #-32]! |
| | AARCH64_SIGN_LINK_REGISTER |
| | str x30, [sp, #16] |
| | mov x12, #1 |
| | ldr w14, [x0, #28] |
| | ldr x4, [x0, #32] |
| | ldr x3, [x0, #8] |
| | lsl x12, x12, x14 |
| | lsr x14, x12, #2 |
| | add x7, x2, x12, lsl #1 |
| | mov x12, #-16 |
| | sub x7, x7, #16 |
| |
|
| | ld2 {v16.2s,v17.2s}, [x7], x12 |
| | ld2 {v0.2s,v1.2s}, [x2], #16 |
| | rev64 v17.2s, v17.2s |
| | ld2 {v2.2s,v3.2s}, [x4], #16 |
| | fmul v6.2s, v17.2s, v2.2s |
| | fmul v7.2s, v0.2s, v2.2s |
| | 1: |
| | subs x14, x14, #2 |
| | ldr w6, [x3], #4 |
| | fmul v4.2s, v0.2s, v3.2s |
| | fmul v5.2s, v17.2s, v3.2s |
| | fsub v4.2s, v6.2s, v4.2s |
| | fadd v5.2s, v5.2s, v7.2s |
| | ubfm x8, x6, #16, #31 |
| | ubfm x6, x6, #0, #15 |
| | add x8, x1, x8, lsl #3 |
| | add x6, x1, x6, lsl #3 |
| | b.eq 2f |
| | ld2 {v16.2s,v17.2s}, [x7], x12 |
| | ld2 {v0.2s,v1.2s}, [x2], #16 |
| | rev64 v17.2s, v17.2s |
| | ld2 {v2.2s,v3.2s}, [x4], #16 |
| | fmul v6.2s, v17.2s, v2.2s |
| | fmul v7.2s, v0.2s, v2.2s |
| | st2 {v4.s,v5.s}[0], [x6] |
| | st2 {v4.s,v5.s}[1], [x8] |
| | b 1b |
| | 2: |
| | st2 {v4.s,v5.s}[0], [x6] |
| | st2 {v4.s,v5.s}[1], [x8] |
| |
|
| | mov x19, x0 |
| | mov x20, x1 |
| | bl X(ff_fft_calc_neon) |
| |
|
| | mov x12, #1 |
| | ldr w14, [x19, #28] |
| | ldr x4, [x19, #32] |
| | lsl x12, x12, x14 |
| | lsr x14, x12, #3 |
| |
|
| | add x4, x4, x14, lsl #3 |
| | add x6, x20, x14, lsl #3 |
| | sub x1, x4, #16 |
| | sub x3, x6, #16 |
| |
|
| | mov x7, #-16 |
| | mov x8, x6 |
| | mov x0, x3 |
| |
|
| | ld2 {v0.2s,v1.2s}, [x3], x7 |
| | ld2 {v20.2s,v21.2s},[x6], #16 |
| | ld2 {v16.2s,v17.2s},[x1], x7 |
| | 3: |
| | subs x14, x14, #2 |
| | fmul v7.2s, v0.2s, v17.2s |
| | ld2 {v18.2s,v19.2s},[x4], #16 |
| | fmul v4.2s, v1.2s, v17.2s |
| | fmul v6.2s, v21.2s, v19.2s |
| | fmul v5.2s, v20.2s, v19.2s |
| | fmul v22.2s, v1.2s, v16.2s |
| | fmul v23.2s, v21.2s, v18.2s |
| | fmul v24.2s, v0.2s, v16.2s |
| | fmul v25.2s, v20.2s, v18.2s |
| | fadd v7.2s, v7.2s, v22.2s |
| | fadd v5.2s, v5.2s, v23.2s |
| | fsub v4.2s, v4.2s, v24.2s |
| | fsub v6.2s, v6.2s, v25.2s |
| | b.eq 4f |
| | ld2 {v0.2s,v1.2s}, [x3], x7 |
| | ld2 {v20.2s,v21.2s},[x6], #16 |
| | ld2 {v16.2s,v17.2s},[x1], x7 |
| | rev64 v5.2s, v5.2s |
| | rev64 v7.2s, v7.2s |
| | st2 {v4.2s,v5.2s}, [x0], x7 |
| | st2 {v6.2s,v7.2s}, [x8], #16 |
| | b 3b |
| | 4: |
| | rev64 v5.2s, v5.2s |
| | rev64 v7.2s, v7.2s |
| | st2 {v4.2s,v5.2s}, [x0] |
| | st2 {v6.2s,v7.2s}, [x8] |
| |
|
| | ldr x30, [sp, #16] |
| | AARCH64_VALIDATE_LINK_REGISTER |
| | ldp x19, x20, [sp], #32 |
| |
|
| | ret |
| | endfunc |
| |
|
| | function ff_imdct_calc_neon, export=1 |
| | stp x19, x20, [sp, #-32]! |
| | AARCH64_SIGN_LINK_REGISTER |
| | str x30, [sp, #16] |
| | ldr w3, [x0, #28] |
| | mov x19, #1 |
| | mov x20, x1 |
| | lsl x19, x19, x3 |
| | add x1, x1, x19 |
| |
|
| | bl X(ff_imdct_half_neon) |
| |
|
| | add x0, x20, x19, lsl #2 |
| | add x1, x20, x19, lsl #1 |
| | sub x0, x0, #8 |
| | sub x2, x1, #16 |
| | mov x3, #-16 |
| | mov x6, #-8 |
| | 1: |
| | ld1 {v0.4s}, [x2], x3 |
| | prfum pldl1keep, [x0, #-16] |
| | rev64 v0.4s, v0.4s |
| | ld1 {v2.2s,v3.2s}, [x1], #16 |
| | fneg v4.4s, v0.4s |
| | prfum pldl1keep, [x2, #-16] |
| | rev64 v2.2s, v2.2s |
| | rev64 v3.2s, v3.2s |
| | ext v4.16b, v4.16b, v4.16b, #8 |
| | st1 {v2.2s}, [x0], x6 |
| | st1 {v3.2s}, [x0], x6 |
| | st1 {v4.4s}, [x20], #16 |
| | subs x19, x19, #16 |
| | b.gt 1b |
| |
|
| | ldr x30, [sp, #16] |
| | AARCH64_VALIDATE_LINK_REGISTER |
| | ldp x19, x20, [sp], #32 |
| |
|
| | ret |
| | endfunc |
| |
|
| |
|
| | function ff_mdct_calc_neon, export=1 |
| | stp x19, x20, [sp, #-32]! |
| | AARCH64_SIGN_LINK_REGISTER |
| | str x30, [sp, #16] |
| |
|
| | mov x12, #1 |
| | ldr w14, [x0, #28] |
| | ldr x4, [x0, #32] |
| | ldr x3, [x0, #8] |
| | lsl x14, x12, x14 |
| | add x7, x2, x14 |
| | sub x9, x7, #16 |
| | add x2, x7, x14, lsl #1 |
| | add x8, x9, x14, lsl #1 |
| | add x5, x4, x14, lsl #1 |
| | sub x5, x5, #16 |
| | sub x3, x3, #4 |
| | mov x12, #-16 |
| | lsr x13, x14, #1 |
| |
|
| | ld2 {v16.2s,v17.2s}, [x9], x12 |
| | ld2 {v18.2s,v19.2s}, [x8], x12 |
| | ld2 {v0.2s, v1.2s}, [x7], #16 |
| | rev64 v17.2s, v17.2s |
| | rev64 v19.2s, v19.2s |
| | ld2 {v2.2s, v3.2s}, [x2], #16 |
| | fsub v0.2s, v17.2s, v0.2s |
| | ld2 {v20.2s,v21.2s}, [x4], #16 |
| | rev64 v1.2s, v1.2s |
| | rev64 v3.2s, v3.2s |
| | ld2 {v30.2s,v31.2s}, [x5], x12 |
| | fadd v2.2s, v2.2s, v19.2s |
| | fsub v16.2s, v16.2s, v1.2s |
| | fadd v18.2s, v18.2s, v3.2s |
| | 1: |
| | fmul v7.2s, v0.2s, v21.2s |
| | ldr w10, [x3, x13] |
| | fmul v6.2s, v2.2s, v20.2s |
| | ldr w6, [x3, #4]! |
| | fmul v4.2s, v2.2s, v21.2s |
| | fmul v5.2s, v0.2s, v20.2s |
| | fmul v24.2s, v16.2s, v30.2s |
| | fmul v25.2s, v18.2s, v31.2s |
| | fmul v22.2s, v16.2s, v31.2s |
| | fmul v23.2s, v18.2s, v30.2s |
| | subs x14, x14, #16 |
| | subs x13, x13, #8 |
| | fsub v6.2s, v6.2s, v7.2s |
| | fadd v7.2s, v4.2s, v5.2s |
| | fsub v24.2s, v25.2s, v24.2s |
| | fadd v25.2s, v22.2s, v23.2s |
| | b.eq 1f |
| | mov x12, #-16 |
| | ld2 {v16.2s,v17.2s}, [x9], x12 |
| | ld2 {v18.2s,v19.2s}, [x8], x12 |
| | fneg v7.2s, v7.2s |
| | ld2 {v0.2s, v1.2s}, [x7], #16 |
| | rev64 v17.2s, v17.2s |
| | rev64 v19.2s, v19.2s |
| | ld2 {v2.2s, v3.2s}, [x2], #16 |
| | fsub v0.2s, v17.2s, v0.2s |
| | ld2 {v20.2s,v21.2s}, [x4], #16 |
| | rev64 v1.2s, v1.2s |
| | rev64 v3.2s, v3.2s |
| | ld2 {v30.2s,v31.2s}, [x5], x12 |
| | fadd v2.2s, v2.2s, v19.2s |
| | fsub v16.2s, v16.2s, v1.2s |
| | fadd v18.2s, v18.2s, v3.2s |
| | ubfm x12, x6, #16, #31 |
| | ubfm x6, x6, #0, #15 |
| | add x12, x1, x12, lsl #3 |
| | add x6, x1, x6, lsl #3 |
| | st2 {v6.s,v7.s}[0], [x6] |
| | st2 {v6.s,v7.s}[1], [x12] |
| | ubfm x6, x10, #16, #31 |
| | ubfm x10, x10, #0, #15 |
| | add x6 , x1, x6, lsl #3 |
| | add x10, x1, x10, lsl #3 |
| | st2 {v24.s,v25.s}[0], [x10] |
| | st2 {v24.s,v25.s}[1], [x6] |
| | b 1b |
| | 1: |
| | fneg v7.2s, v7.2s |
| | ubfm x12, x6, #16, #31 |
| | ubfm x6, x6, #0, #15 |
| | add x12, x1, x12, lsl #3 |
| | add x6, x1, x6, lsl #3 |
| | st2 {v6.s,v7.s}[0], [x6] |
| | st2 {v6.s,v7.s}[1], [x12] |
| | ubfm x6, x10, #16, #31 |
| | ubfm x10, x10, #0, #15 |
| | add x6 , x1, x6, lsl #3 |
| | add x10, x1, x10, lsl #3 |
| | st2 {v24.s,v25.s}[0], [x10] |
| | st2 {v24.s,v25.s}[1], [x6] |
| |
|
| | mov x19, x0 |
| | mov x20, x1 |
| | bl X(ff_fft_calc_neon) |
| |
|
| | mov x12, #1 |
| | ldr w14, [x19, #28] |
| | ldr x4, [x19, #32] |
| | lsl x12, x12, x14 |
| | lsr x14, x12, #3 |
| |
|
| | add x4, x4, x14, lsl #3 |
| | add x6, x20, x14, lsl #3 |
| | sub x1, x4, #16 |
| | sub x3, x6, #16 |
| |
|
| | mov x7, #-16 |
| | mov x8, x6 |
| | mov x0, x3 |
| |
|
| | ld2 {v0.2s,v1.2s}, [x3], x7 |
| | ld2 {v20.2s,v21.2s}, [x6], #16 |
| | ld2 {v16.2s,v17.2s}, [x1], x7 |
| | 1: |
| | subs x14, x14, #2 |
| | fmul v7.2s, v0.2s, v17.2s |
| | ld2 {v18.2s,v19.2s}, [x4], #16 |
| | fmul v4.2s, v1.2s, v17.2s |
| | fmul v6.2s, v21.2s, v19.2s |
| | fmul v5.2s, v20.2s, v19.2s |
| | fmul v24.2s, v0.2s, v16.2s |
| | fmul v25.2s, v20.2s, v18.2s |
| | fmul v22.2s, v21.2s, v18.2s |
| | fmul v23.2s, v1.2s, v16.2s |
| | fadd v4.2s, v4.2s, v24.2s |
| | fadd v6.2s, v6.2s, v25.2s |
| | fsub v5.2s, v22.2s, v5.2s |
| | fsub v7.2s, v23.2s, v7.2s |
| | fneg v4.2s, v4.2s |
| | fneg v6.2s, v6.2s |
| | b.eq 1f |
| | ld2 {v0.2s, v1.2s}, [x3], x7 |
| | ld2 {v20.2s,v21.2s}, [x6], #16 |
| | ld2 {v16.2s,v17.2s}, [x1], x7 |
| | rev64 v5.2s, v5.2s |
| | rev64 v7.2s, v7.2s |
| | st2 {v4.2s,v5.2s}, [x0], x7 |
| | st2 {v6.2s,v7.2s}, [x8], #16 |
| | b 1b |
| | 1: |
| | rev64 v5.2s, v5.2s |
| | rev64 v7.2s, v7.2s |
| | st2 {v4.2s,v5.2s}, [x0] |
| | st2 {v6.2s,v7.2s}, [x8] |
| |
|
| | ldr x30, [sp, #16] |
| | AARCH64_VALIDATE_LINK_REGISTER |
| | ldp x19, x20, [sp], #32 |
| |
|
| | ret |
| | endfunc |
| |
|