| | // Copyright 2018 The Go Authors. All rights reserved. |
| | // Use of this source code is governed by a BSD-style |
| | // license that can be found in the LICENSE file. |
| |
|
| | #include "go_asm.h" |
| | #include "textflag.h" |
| |
|
| | // memequal_varlen(a, b unsafe.Pointer) bool |
| | TEXT runtime路memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17 |
| | MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure |
| | CBZ R2, eq |
| | B runtime路memequal<ABIInternal>(SB) |
| | eq: |
| | MOVD $1, R0 |
| | RET |
| |
|
| | // input: |
| | // R0: pointer a |
| | // R1: pointer b |
| | // R2: data len |
| | // at return: result in R0 |
| | // memequal(a, b unsafe.Pointer, size uintptr) bool |
| | TEXT runtime路memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25 |
| | // short path to handle 0-byte case |
| | CBZ R2, equal |
| | // short path to handle equal pointers |
| | CMP R0, R1 |
| | BEQ equal |
| | CMP $1, R2 |
| | // handle 1-byte special case for better performance |
| | BEQ one |
| | CMP $16, R2 |
| | // handle specially if length < 16 |
| | BLO tail |
| | BIC $0x3f, R2, R3 |
| | CBZ R3, chunk16 |
| | // work with 64-byte chunks |
| | ADD R3, R0, R6 // end of chunks |
| | chunk64_loop: |
| | VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2] |
| | VLD1.P (R1), [V4.D2, V5.D2, V6.D2, V7.D2] |
| | VCMEQ V0.D2, V4.D2, V8.D2 |
| | VCMEQ V1.D2, V5.D2, V9.D2 |
| | VCMEQ V2.D2, V6.D2, V10.D2 |
| | VCMEQ V3.D2, V7.D2, V11.D2 |
| | VAND V8.B16, V9.B16, V8.B16 |
| | VAND V8.B16, V10.B16, V8.B16 |
| | VAND V8.B16, V11.B16, V8.B16 |
| | CMP R0, R6 |
| | VMOV V8.D[0], R4 |
| | VMOV V8.D[1], R5 |
| | CBZ R4, not_equal |
| | CBZ R5, not_equal |
| | BNE chunk64_loop |
| | AND $0x3f, R2, R2 |
| | CBZ R2, equal |
| | chunk16: |
| | // work with 16-byte chunks |
| | BIC $0xf, R2, R3 |
| | CBZ R3, tail |
| | ADD R3, R0, R6 // end of chunks |
| | chunk16_loop: |
| | LDP.P 16(R0), (R4, R5) |
| | LDP.P 16(R1), (R7, R9) |
| | EOR R4, R7 |
| | CBNZ R7, not_equal |
| | EOR R5, R9 |
| | CBNZ R9, not_equal |
| | CMP R0, R6 |
| | BNE chunk16_loop |
| | AND $0xf, R2, R2 |
| | CBZ R2, equal |
| | tail: |
| | // special compare of tail with length < 16 |
| | TBZ $3, R2, lt_8 |
| | MOVD (R0), R4 |
| | MOVD (R1), R5 |
| | EOR R4, R5 |
| | CBNZ R5, not_equal |
| | SUB $8, R2, R6 // offset of the last 8 bytes |
| | MOVD (R0)(R6), R4 |
| | MOVD (R1)(R6), R5 |
| | EOR R4, R5 |
| | CBNZ R5, not_equal |
| | B equal |
| | PCALIGN $16 |
| | lt_8: |
| | TBZ $2, R2, lt_4 |
| | MOVWU (R0), R4 |
| | MOVWU (R1), R5 |
| | EOR R4, R5 |
| | CBNZ R5, not_equal |
| | SUB $4, R2, R6 // offset of the last 4 bytes |
| | MOVWU (R0)(R6), R4 |
| | MOVWU (R1)(R6), R5 |
| | EOR R4, R5 |
| | CBNZ R5, not_equal |
| | B equal |
| | PCALIGN $16 |
| | lt_4: |
| | TBZ $1, R2, lt_2 |
| | MOVHU.P 2(R0), R4 |
| | MOVHU.P 2(R1), R5 |
| | CMP R4, R5 |
| | BNE not_equal |
| | lt_2: |
| | TBZ $0, R2, equal |
| | one: |
| | MOVBU (R0), R4 |
| | MOVBU (R1), R5 |
| | CMP R4, R5 |
| | BNE not_equal |
| | equal: |
| | MOVD $1, R0 |
| | RET |
| | not_equal: |
| | MOVB ZR, R0 |
| | RET |
| |
|