Spaces:
Running
Running
| /* | |
| Stockfish, a UCI chess playing engine derived from Glaurung 2.1 | |
| Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) | |
| Stockfish is free software: you can redistribute it and/or modify | |
| it under the terms of the GNU General Public License as published by | |
| the Free Software Foundation, either version 3 of the License, or | |
| (at your option) any later version. | |
| Stockfish is distributed in the hope that it will be useful, | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| GNU General Public License for more details. | |
| You should have received a copy of the GNU General Public License | |
| along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| */ | |
| // Definition of layer ClippedReLU of NNUE evaluation function | |
| namespace Stockfish::Eval::NNUE::Layers { | |
| // Clipped ReLU | |
| template<IndexType InDims> | |
| class SqrClippedReLU { | |
| public: | |
| // Input/output type | |
| using InputType = std::int32_t; | |
| using OutputType = std::uint8_t; | |
| // Number of input/output dimensions | |
| static constexpr IndexType InputDimensions = InDims; | |
| static constexpr IndexType OutputDimensions = InputDimensions; | |
| static constexpr IndexType PaddedOutputDimensions = | |
| ceil_to_multiple<IndexType>(OutputDimensions, 32); | |
| using OutputBuffer = OutputType[PaddedOutputDimensions]; | |
| // Hash value embedded in the evaluation file | |
| static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { | |
| std::uint32_t hashValue = 0x538D24C7u; | |
| hashValue += prevHash; | |
| return hashValue; | |
| } | |
| // Read network parameters | |
| bool read_parameters(std::istream&) { return true; } | |
| // Write network parameters | |
| bool write_parameters(std::ostream&) const { return true; } | |
| std::size_t get_content_hash() const { | |
| std::size_t h = 0; | |
| hash_combine(h, get_hash_value(0)); | |
| return h; | |
| } | |
| // Forward propagation | |
| void propagate(const InputType* input, OutputType* output) const { | |
| constexpr IndexType NumChunks = InputDimensions / 16; | |
| static_assert(WeightScaleBits == 6); | |
| const auto in = reinterpret_cast<const __m128i*>(input); | |
| const auto out = reinterpret_cast<__m128i*>(output); | |
| for (IndexType i = 0; i < NumChunks; ++i) | |
| { | |
| __m128i words0 = | |
| _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])); | |
| __m128i words1 = | |
| _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])); | |
| // We shift by WeightScaleBits * 2 = 12 and divide by 128 | |
| // which is an additional shift-right of 7, meaning 19 in total. | |
| // MulHi strips the lower 16 bits so we need to shift out 3 more to match. | |
| words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3); | |
| words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3); | |
| _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); | |
| } | |
| constexpr IndexType Start = NumChunks * 16; | |
| constexpr IndexType Start = 0; | |
| for (IndexType i = Start; i < InputDimensions; ++i) | |
| { | |
| output[i] = static_cast<OutputType>( | |
| // Really should be /127 but we need to make it fast so we right-shift | |
| // by an extra 7 bits instead. Needs to be accounted for in the trainer. | |
| std::min(127ll, ((long long) (input[i]) * input[i]) >> (2 * WeightScaleBits + 7))); | |
| } | |
| } | |
| }; | |
| } // namespace Stockfish::Eval::NNUE::Layers | |