| // Copyright 2025 The Go Authors. All rights reserved. | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| package asmgen | |
| // shiftVU generates lshVU and rshVU, which do | |
| // z, c = x << s and z, c = x >> s, for 0 < s < _W. | |
| func shiftVU(a *Asm, name string) { | |
| // Because these routines can be called for z.Lsh(z, N) and z.Rsh(z, N), | |
| // the input and output slices may be aliased at different offsets. | |
| // For example (on 64-bit systems), during z.Lsh(z, 65), &z[0] == &x[1], | |
| // and during z.Rsh(z, 65), &z[1] == &x[0]. | |
| // For left shift, we must process the slices from len(z)-1 down to 0, | |
| // so that we don't overwrite a word before we need to read it. | |
| // For right shift, we must process the slices from 0 up to len(z)-1. | |
| // The different traversals at least make the two cases more consistent, | |
| // since we're always delaying the output by one word compared | |
| // to the input. | |
| f := a.Func("func " + name + "(z, x []Word, s uint) (c Word)") | |
| // Check for no input early, since we need to start by reading 1 word. | |
| n := f.Arg("z_len") | |
| a.JmpZero(n, "ret0") | |
| // Start loop by reading first input word. | |
| s := f.ArgHint("s", HintShiftCount) | |
| p := f.Pipe() | |
| if name == "lshVU" { | |
| p.SetBackward() | |
| } | |
| unroll := []int{1, 4} | |
| if a.Arch == Arch386 { | |
| unroll = []int{1} // too few registers for more | |
| p.SetUseIndexCounter() | |
| } | |
| p.LoadPtrs(n) | |
| a.Comment("shift first word into carry") | |
| prev := p.LoadN(1)[0][0] | |
| // Decide how to shift. On systems with a wide shift (x86), use that. | |
| // Otherwise, we need shift by s and negative (reverse) shift by 64-s or 32-s. | |
| shift := a.Lsh | |
| shiftWide := a.LshWide | |
| negShift := a.Rsh | |
| negShiftReg := a.RshReg | |
| if name == "rshVU" { | |
| shift = a.Rsh | |
| shiftWide = a.RshWide | |
| negShift = a.Lsh | |
| negShiftReg = a.LshReg | |
| } | |
| if a.Arch.HasShiftWide() { | |
| // Use wide shift to avoid needing negative shifts. | |
| // The invariant is that prev holds the previous word (not shifted at all), | |
| // to be used as input into the wide shift. | |
| // After the loop finishes, prev holds the final output word to be written. | |
| c := a.Reg() | |
| shiftWide(s, prev, a.Imm(0), c) | |
| f.StoreArg(c, "c") | |
| a.Free(c) | |
| a.Comment("shift remaining words") | |
| p.Start(n, unroll...) | |
| p.Loop(func(in [][]Reg, out [][]Reg) { | |
| // We reuse the input registers as output, delayed one cycle; prev is the first output. | |
| // After writing the outputs to memory, we can copy the final x value into prev | |
| // for the next iteration. | |
| old := prev | |
| for i, x := range in[0] { | |
| shiftWide(s, x, old, old) | |
| out[0][i] = old | |
| old = x | |
| } | |
| p.StoreN(out) | |
| a.Mov(old, prev) | |
| }) | |
| a.Comment("store final shifted bits") | |
| shift(s, prev, prev) | |
| } else { | |
| // Construct values from x << s and x >> (64-s). | |
| // After the first word has been processed, the invariant is that | |
| // prev holds x << s, to be used as the high bits of the next output word, | |
| // once we find the low bits after reading the next input word. | |
| // After the loop finishes, prev holds the final output word to be written. | |
| sNeg := a.Reg() | |
| a.Mov(a.Imm(a.Arch.WordBits), sNeg) | |
| a.Sub(s, sNeg, sNeg, SmashCarry) | |
| c := a.Reg() | |
| negShift(sNeg, prev, c) | |
| shift(s, prev, prev) | |
| f.StoreArg(c, "c") | |
| a.Free(c) | |
| a.Comment("shift remaining words") | |
| p.Start(n, unroll...) | |
| p.Loop(func(in, out [][]Reg) { | |
| if a.HasRegShift() { | |
| // ARM (32-bit) allows shifts in most arithmetic expressions, | |
| // including OR, letting us combine the negShift and a.Or. | |
| // The simplest way to manage the registers is to do StoreN for | |
| // one output at a time, and since we don't use multi-register | |
| // stores on ARM, that doesn't hurt us. | |
| out[0] = out[0][:1] | |
| for _, x := range in[0] { | |
| a.Or(negShiftReg(sNeg, x), prev, prev) | |
| out[0][0] = prev | |
| p.StoreN(out) | |
| shift(s, x, prev) | |
| } | |
| return | |
| } | |
| // We reuse the input registers as output, delayed one cycle; z0 is the first output. | |
| z0 := a.Reg() | |
| z := z0 | |
| for i, x := range in[0] { | |
| negShift(sNeg, x, z) | |
| a.Or(prev, z, z) | |
| shift(s, x, prev) | |
| out[0][i] = z | |
| z = x | |
| } | |
| p.StoreN(out) | |
| }) | |
| a.Comment("store final shifted bits") | |
| } | |
| p.StoreN([][]Reg{{prev}}) | |
| p.Done() | |
| a.Free(s) | |
| a.Ret() | |
| // Return 0, used from above. | |
| a.Label("ret0") | |
| f.StoreArg(a.Imm(0), "c") | |
| a.Ret() | |
| } | |