| |
| |
| |
|
|
| #include "textflag.h" |
|
|
| |
| |
| |
|
|
| |
| #define ROL(N, R, T) \ |
| MOVO R, T; PSLLL $(N), T; PSRLL $(32-(N)), R; PXOR T, R |
|
|
| |
| #ifdef GOAMD64_v2 |
| #define ROL16(R, T) PSHUFB 路rol16<>(SB), R |
| #else |
| #define ROL16(R, T) ROL(16, R, T) |
| #endif |
|
|
| |
| #ifdef GOAMD64_v2 |
| #define ROL8(R, T) PSHUFB 路rol8<>(SB), R |
| #else |
| #define ROL8(R, T) ROL(8, R, T) |
| #endif |
|
|
| |
| #define QR(A, B, C, D, T) \ |
| PADDD B, A; PXOR A, D; ROL16(D, T); \ |
| PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $12, T; PSRLL $20, B; PXOR T, B; \ |
| PADDD B, A; PXOR A, D; ROL8(D, T); \ |
| PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $7, T; PSRLL $25, B; PXOR T, B |
|
|
| |
| #define REPLREG(R, XR) \ |
| MOVQ R, XR; \ |
| PSHUFD $0, XR, XR |
|
|
| |
| #define REPL(val, XR) \ |
| MOVL $val, DX; \ |
| REPLREG(DX, XR) |
|
|
| |
| |
| #define SEED(off, reg, XR) \ |
| MOVL (4*off)(AX), reg; \ |
| REPLREG(reg, XR) \ |
|
|
| |
|
|
| |
| TEXT 路block<ABIInternal>(SB), NOSPLIT, $16 |
| |
| |
| |
|
|
| |
| REPL(0x61707865, X0) |
| REPL(0x3320646e, X1) |
| REPL(0x79622d32, X2) |
| REPL(0x6b206574, X3) |
|
|
| |
| |
| |
| |
| |
| MOVL CX, 0(SP) |
| INCL CX |
| MOVL CX, 4(SP) |
| INCL CX |
| MOVL CX, 8(SP) |
| INCL CX |
| MOVL CX, 12(SP) |
| MOVOU 0(SP), X12 |
|
|
| |
| SEED(0, DI, X4) |
| SEED(1, SI, X5) |
| SEED(2, R8, X6) |
| SEED(3, R9, X7) |
| SEED(4, R10, X8) |
| SEED(5, R11, X9) |
| SEED(6, R12, X10) |
| SEED(7, R13, X11) |
|
|
| |
| |
| |
| |
| |
| |
| MOVL $0, DX |
| MOVQ DX, X13 |
| MOVQ DX, X14 |
| MOVOU X14, (15*16)(BX) |
|
|
| |
| MOVL $4, DX |
| loop: |
| QR(X0, X4, X8, X12, X15) |
| MOVOU X4, (4*16)(BX) // save X4 |
| QR(X1, X5, X9, X13, X15) |
| MOVOU (15*16)(BX), X15 // reload X15; temp now X4 |
| QR(X2, X6, X10, X14, X4) |
| QR(X3, X7, X11, X15, X4) |
|
|
| QR(X0, X5, X10, X15, X4) |
| MOVOU X15, (15*16)(BX) |
| QR(X1, X6, X11, X12, X4) |
| MOVOU (4*16)(BX), X4 |
| QR(X2, X7, X8, X13, X15) |
| QR(X3, X4, X9, X14, X15) |
|
|
| DECL DX |
| JNZ loop |
|
|
| |
| |
|
|
| |
| MOVOU X0, (0*16)(BX) |
| MOVOU X1, (1*16)(BX) |
| MOVOU X2, (2*16)(BX) |
| MOVOU X3, (3*16)(BX) |
| MOVOU X12, (12*16)(BX) |
| MOVOU X13, (13*16)(BX) |
| MOVOU X14, (14*16)(BX) |
| |
|
|
| |
| |
| REPLREG(DI, X0) |
| REPLREG(SI, X1) |
| REPLREG(R8, X2) |
| REPLREG(R9, X3) |
| REPLREG(R10, X12) |
| REPLREG(R11, X13) |
| REPLREG(R12, X14) |
| REPLREG(R13, X15) |
| PADDD X0, X4 |
| PADDD X1, X5 |
| PADDD X2, X6 |
| PADDD X3, X7 |
| PADDD X12, X8 |
| PADDD X13, X9 |
| PADDD X14, X10 |
| PADDD X15, X11 |
| MOVOU X4, (4*16)(BX) |
| MOVOU X5, (5*16)(BX) |
| MOVOU X6, (6*16)(BX) |
| MOVOU X7, (7*16)(BX) |
| MOVOU X8, (8*16)(BX) |
| MOVOU X9, (9*16)(BX) |
| MOVOU X10, (10*16)(BX) |
| MOVOU X11, (11*16)(BX) |
|
|
| MOVL $0, AX |
| MOVQ AX, X15 |
|
|
| RET |
|
|
| |
| GLOBL 路rol16<>(SB), NOPTR|RODATA, $16 |
| DATA 路rol16<>+0(SB)/8, $0x0504070601000302 |
| DATA 路rol16<>+8(SB)/8, $0x0D0C0F0E09080B0A |
|
|
| |
| GLOBL 路rol8<>(SB), NOPTR|RODATA, $16 |
| DATA 路rol8<>+0(SB)/8, $0x0605040702010003 |
| DATA 路rol8<>+8(SB)/8, $0x0E0D0C0F0A09080B |
|
|