File size: 2,471 Bytes
e36aeda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "go_asm.h"
#include "textflag.h"

// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime路memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
	MOVD	8(R26), R2    // compiler stores size at offset 8 in the closure
	CBZ	R2, eq
	B	runtime路memequal<ABIInternal>(SB)
eq:
	MOVD	$1, R0
	RET

// input:
// R0: pointer a
// R1: pointer b
// R2: data len
// at return: result in R0
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime路memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
	// short path to handle 0-byte case
	CBZ     R2, equal
	// short path to handle equal pointers
	CMP     R0, R1
	BEQ     equal
	CMP	$1, R2
	// handle 1-byte special case for better performance
	BEQ	one
	CMP	$16, R2
	// handle specially if length < 16
	BLO	tail
	BIC	$0x3f, R2, R3
	CBZ	R3, chunk16
	// work with 64-byte chunks
	ADD	R3, R0, R6	// end of chunks
chunk64_loop:
	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
	VLD1.P	(R1), [V4.D2, V5.D2, V6.D2, V7.D2]
	VCMEQ	V0.D2, V4.D2, V8.D2
	VCMEQ	V1.D2, V5.D2, V9.D2
	VCMEQ	V2.D2, V6.D2, V10.D2
	VCMEQ	V3.D2, V7.D2, V11.D2
	VAND	V8.B16, V9.B16, V8.B16
	VAND	V8.B16, V10.B16, V8.B16
	VAND	V8.B16, V11.B16, V8.B16
	CMP	R0, R6
	VMOV	V8.D[0], R4
	VMOV	V8.D[1], R5
	CBZ	R4, not_equal
	CBZ	R5, not_equal
	BNE	chunk64_loop
	AND	$0x3f, R2, R2
	CBZ	R2, equal
chunk16:
	// work with 16-byte chunks
	BIC	$0xf, R2, R3
	CBZ	R3, tail
	ADD	R3, R0, R6	// end of chunks
chunk16_loop:
	LDP.P	16(R0), (R4, R5)
	LDP.P	16(R1), (R7, R9)
	EOR	R4, R7
	CBNZ	R7, not_equal
	EOR	R5, R9
	CBNZ	R9, not_equal
	CMP	R0, R6
	BNE	chunk16_loop
	AND	$0xf, R2, R2
	CBZ	R2, equal
tail:
	// special compare of tail with length < 16
	TBZ	$3, R2, lt_8
	MOVD	(R0), R4
	MOVD	(R1), R5
	EOR	R4, R5
	CBNZ	R5, not_equal
	SUB	$8, R2, R6	// offset of the last 8 bytes
	MOVD	(R0)(R6), R4
	MOVD	(R1)(R6), R5
	EOR	R4, R5
	CBNZ	R5, not_equal
	B	equal
	PCALIGN	$16
lt_8:
	TBZ	$2, R2, lt_4
	MOVWU	(R0), R4
	MOVWU	(R1), R5
	EOR	R4, R5
	CBNZ	R5, not_equal
	SUB	$4, R2, R6	// offset of the last 4 bytes
	MOVWU	(R0)(R6), R4
	MOVWU	(R1)(R6), R5
	EOR	R4, R5
	CBNZ	R5, not_equal
	B	equal
	PCALIGN	$16
lt_4:
	TBZ	$1, R2, lt_2
	MOVHU.P	2(R0), R4
	MOVHU.P	2(R1), R5
	CMP	R4, R5
	BNE	not_equal
lt_2:
	TBZ	$0, R2, equal
one:
	MOVBU	(R0), R4
	MOVBU	(R1), R5
	CMP	R4, R5
	BNE	not_equal
equal:
	MOVD	$1, R0
	RET
not_equal:
	MOVB	ZR, R0
	RET