diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 4eb64ffef42045c2050fbbd616ab6ef185f1a6e1..0000000000000000000000000000000000000000 --- a/src/Makefile +++ /dev/null @@ -1,1184 +0,0 @@ -# Stockfish, a UCI chess playing engine derived from Glaurung 2.1 -# Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) -# -# Stockfish is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Stockfish is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -### ========================================================================== -### Section 1. General Configuration -### ========================================================================== - -### Establish the operating system name -KERNEL := $(shell uname -s) -ifeq ($(KERNEL),Linux) - OS := $(shell uname -o) -endif - -### Command prefix to run the built executable (e.g. wine, sde, qemu) -### Backward compatible alias: WINE_PATH (deprecated) -ifneq ($(strip $(WINE_PATH)),) -ifeq ($(strip $(RUN_PREFIX)),) -RUN_PREFIX := $(WINE_PATH) -endif -ifeq ($(MAKELEVEL),0) -ifneq ($(strip $(RUN_PREFIX)),$(strip $(WINE_PATH))) -$(warning *** Both RUN_PREFIX and WINE_PATH are set; ignoring WINE_PATH. ***) -else -$(warning *** WINE_PATH is deprecated; use RUN_PREFIX instead. ***) -endif -endif -endif - -### Target Windows OS -ifeq ($(OS),Windows_NT) - ifneq ($(COMP),ndk) - target_windows = yes - endif -else ifeq ($(COMP),mingw) - target_windows = yes - ifeq ($(RUN_PREFIX),) - RUN_PREFIX := $(shell which wine) - endif -endif - -### Executable name -ifeq ($(target_windows),yes) - EXE = stockfish.exe -else - EXE = stockfish -endif - -### Installation dir definitions -PREFIX = /usr/local -BINDIR = $(PREFIX)/bin - -### Built-in benchmark for pgo-builds -PGOBENCH = $(RUN_PREFIX) ./$(EXE) bench - -### Source and object files -SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \ - misc.cpp movegen.cpp movepick.cpp position.cpp \ - search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ - nnue/nnue_accumulator.cpp nnue/nnue_misc.cpp nnue/network.cpp \ - nnue/features/half_ka_v2_hm.cpp nnue/features/full_threats.cpp \ - engine.cpp score.cpp memory.cpp - -HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h history.h \ - nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/features/full_threats.h \ - nnue/layers/affine_transform.h nnue/layers/affine_transform_sparse_input.h \ - nnue/layers/clipped_relu.h nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h \ - nnue/nnue_architecture.h nnue/nnue_common.h nnue/nnue_feature_transformer.h nnue/simd.h \ - position.h search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \ - tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h memory.h shm.h shm_linux.h - -OBJS = $(notdir $(SRCS:.cpp=.o)) - -VPATH = syzygy:nnue:nnue/features - -### ========================================================================== -### Section 2. High-level Configuration -### ========================================================================== -# -# flag --- Comp switch --- Description -# ---------------------------------------------------------------------------- -# -# debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode -# sanitize = none/ ... (-fsanitize ) -# --- ( undefined ) --- enable undefined behavior checks -# --- ( thread ) --- enable threading error checks -# --- ( address ) --- enable memory access checks -# --- ...etc... --- see compiler documentation for supported sanitizers -# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations -# arch = (name) --- (-arch) --- Target architecture -# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system -# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction -# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction -# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction -# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions -# mmx = yes/no --- -mmmx --- Use Intel MMX instructions -# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2 -# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 -# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 -# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 -# avxvnni = yes/no --- -mavxvnni --- Use Intel Vector Neural Network Instructions AVX -# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 -# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 -# avx512icl = yes/no --- ... multiple ... --- Use All AVX-512 features available on both Intel Ice Lake and AMD Zen 4 -# altivec = yes/no --- -maltivec --- Use PowerPC Altivec SIMD extension -# vsx = yes/no --- -mvsx --- Use POWER VSX SIMD extension -# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture -# dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions -# lsx = yes/no --- -mlsx --- Use Loongson SIMD eXtension -# lasx = yes/no --- -mlasx --- use Loongson Advanced SIMD eXtension -# -# Note that Makefile is space sensitive, so when adding new architectures -# or modifying existing flags, you have to make sure there are no extra spaces -# at the end of the line for flag values. -# -# Example of use for these flags: -# make build ARCH=x86-64-avx512 debug=yes sanitize="address undefined" - - -### 2.1. General and architecture defaults - -ifeq ($(ARCH),) - ARCH = native -endif - -ifeq ($(ARCH), native) - override ARCH := $(shell $(SHELL) ../scripts/get_native_properties.sh | cut -d " " -f 1) -endif - -# explicitly check for the list of supported architectures (as listed with make help), -# the user can override with `make ARCH=x86-64-avx512icl SUPPORTED_ARCH=true` -ifeq ($(ARCH), $(filter $(ARCH), \ - x86-64-avx512icl x86-64-vnni512 x86-64-avx512 x86-64-avxvnni \ - x86-64-bmi2 x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ - x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-64-altivec ppc-64-vsx ppc-32 e2k \ - armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64 \ - loongarch64 loongarch64-lsx loongarch64-lasx)) - SUPPORTED_ARCH=true -else - SUPPORTED_ARCH=false -endif - -optimize = yes -debug = no -sanitize = none -bits = 64 -prefetch = no -popcnt = no -pext = no -sse = no -mmx = no -sse2 = no -ssse3 = no -sse41 = no -avx2 = no -avxvnni = no -avx512 = no -vnni512 = no -avx512icl = no -altivec = no -vsx = no -neon = no -dotprod = no -arm_version = 0 -lsx = no -lasx = no -STRIP = strip - -ifneq ($(shell which clang-format-20 2> /dev/null),) - CLANG-FORMAT = clang-format-20 -else - CLANG-FORMAT = clang-format -endif - -### 2.2 Architecture specific - -ifeq ($(findstring x86,$(ARCH)),x86) - -# x86-32/64 - -ifeq ($(findstring x86-32,$(ARCH)),x86-32) - arch = i386 - bits = 32 - sse = no - mmx = yes -else - arch = x86_64 - sse = yes - sse2 = yes -endif - -ifeq ($(findstring -sse,$(ARCH)),-sse) - sse = yes -endif - -ifeq ($(findstring -popcnt,$(ARCH)),-popcnt) - popcnt = yes -endif - -ifeq ($(findstring -mmx,$(ARCH)),-mmx) - mmx = yes -endif - -ifeq ($(findstring -sse2,$(ARCH)),-sse2) - sse = yes - sse2 = yes -endif - -ifeq ($(findstring -ssse3,$(ARCH)),-ssse3) - sse = yes - sse2 = yes - ssse3 = yes -endif - -ifeq ($(findstring -sse41,$(ARCH)),-sse41) - sse = yes - sse2 = yes - ssse3 = yes - sse41 = yes -endif - -ifeq ($(findstring -modern,$(ARCH)),-modern) - $(warning *** ARCH=$(ARCH) is deprecated, defaulting to ARCH=x86-64-sse41-popcnt. Execute `make help` for a list of available architectures. ***) - $(shell sleep 5) - popcnt = yes - sse = yes - sse2 = yes - ssse3 = yes - sse41 = yes -endif - -ifeq ($(findstring -avx2,$(ARCH)),-avx2) - popcnt = yes - sse = yes - sse2 = yes - ssse3 = yes - sse41 = yes - avx2 = yes -endif - -ifeq ($(findstring -avxvnni,$(ARCH)),-avxvnni) - popcnt = yes - sse = yes - sse2 = yes - ssse3 = yes - sse41 = yes - avx2 = yes - avxvnni = yes - pext = yes -endif - -ifeq ($(findstring -bmi2,$(ARCH)),-bmi2) - popcnt = yes - sse = yes - sse2 = yes - ssse3 = yes - sse41 = yes - avx2 = yes - pext = yes -endif - -ifeq ($(findstring -avx512,$(ARCH)),-avx512) - popcnt = yes - sse = yes - sse2 = yes - ssse3 = yes - sse41 = yes - avx2 = yes - pext = yes - avx512 = yes -endif - -ifeq ($(findstring -vnni512,$(ARCH)),-vnni512) - popcnt = yes - sse = yes - sse2 = yes - ssse3 = yes - sse41 = yes - avx2 = yes - pext = yes - avx512 = yes - vnni512 = yes -endif - -ifeq ($(findstring -avx512icl,$(ARCH)),-avx512icl) - popcnt = yes - sse = yes - sse2 = yes - ssse3 = yes - sse41 = yes - avx2 = yes - pext = yes - avx512 = yes - vnni512 = yes - avx512icl = yes -endif - -ifeq ($(sse),yes) - prefetch = yes -endif - -# 64-bit pext is not available on x86-32 -ifeq ($(bits),32) - pext = no -endif - -else - -# all other architectures - -ifeq ($(ARCH),general-32) - arch = any - bits = 32 -endif - -ifeq ($(ARCH),general-64) - arch = any -endif - -ifeq ($(ARCH),armv7) - arch = armv7 - prefetch = yes - bits = 32 - arm_version = 7 -endif - -ifeq ($(ARCH),armv7-neon) - arch = armv7 - prefetch = yes - popcnt = yes - neon = yes - bits = 32 - arm_version = 7 -endif - -ifeq ($(ARCH),armv8) - arch = armv8 - prefetch = yes - popcnt = yes - neon = yes - arm_version = 8 -endif - -ifeq ($(ARCH),armv8-dotprod) - arch = armv8 - prefetch = yes - popcnt = yes - neon = yes - dotprod = yes - arm_version = 8 -endif - -ifeq ($(ARCH),apple-silicon) - arch = arm64 - prefetch = yes - popcnt = yes - neon = yes - dotprod = yes - arm_version = 8 -endif - -ifeq ($(ARCH),ppc-32) - arch = ppc - bits = 32 -endif - -ifeq ($(ARCH),ppc-64) - arch = ppc64 - popcnt = yes - prefetch = yes -endif - -ifeq ($(ARCH),ppc-64-altivec) - arch = ppc64 - popcnt = yes - prefetch = yes - altivec = yes -endif - -ifeq ($(ARCH),ppc-64-vsx) - arch = ppc64 - popcnt = yes - prefetch = yes - vsx = yes -endif - -ifeq ($(findstring e2k,$(ARCH)),e2k) - arch = e2k - mmx = yes - bits = 64 - sse = yes - sse2 = yes - ssse3 = yes - sse41 = yes - popcnt = yes -endif - -ifeq ($(ARCH),riscv64) - arch = riscv64 -endif - -ifeq ($(findstring loongarch64,$(ARCH)),loongarch64) - arch = loongarch64 - prefetch = yes - -ifeq ($(findstring -lasx,$(ARCH)),-lasx) - lsx = yes - lasx = yes -endif - -ifeq ($(findstring -lsx,$(ARCH)),-lsx) - lsx = yes -endif - -endif -endif - - -### ========================================================================== -### Section 3. Low-level Configuration -### ========================================================================== - -### 3.1 Selecting compiler (default = gcc) -ifeq ($(MAKELEVEL),0) - export ENV_CXXFLAGS := $(CXXFLAGS) - export ENV_DEPENDFLAGS := $(DEPENDFLAGS) - export ENV_LDFLAGS := $(LDFLAGS) -endif - -CXXFLAGS = $(ENV_CXXFLAGS) -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) -DEPENDFLAGS = $(ENV_DEPENDFLAGS) -std=c++17 -LDFLAGS = $(ENV_LDFLAGS) $(EXTRALDFLAGS) - -ifeq ($(COMP),) - COMP=gcc -endif - -ifeq ($(COMP),gcc) - comp=gcc - CXX=g++ - CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations - - ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64)) - ifeq ($(OS),Android) - CXXFLAGS += -m$(bits) - LDFLAGS += -m$(bits) - endif - ifeq ($(ARCH),riscv64) - CXXFLAGS += -latomic - endif - else ifeq ($(arch),loongarch64) - CXXFLAGS += -latomic - else - CXXFLAGS += -m$(bits) - LDFLAGS += -m$(bits) - endif - - ifeq ($(arch),$(filter $(arch),armv7)) - LDFLAGS += -latomic - endif - - ifneq ($(KERNEL),Darwin) - LDFLAGS += -Wl,--no-as-needed - endif -endif - -ifeq ($(target_windows),yes) - LDFLAGS += -static -endif - -ifeq ($(COMP),mingw) - comp=mingw - - ifeq ($(bits),64) - ifeq ($(shell which x86_64-w64-mingw32-c++-posix 2> /dev/null),) - CXX=x86_64-w64-mingw32-c++ - else - CXX=x86_64-w64-mingw32-c++-posix - endif - else - ifeq ($(shell which i686-w64-mingw32-c++-posix 2> /dev/null),) - CXX=i686-w64-mingw32-c++ - else - CXX=i686-w64-mingw32-c++-posix - endif - endif - CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations -endif - -ifeq ($(COMP),icx) - comp=icx - CXX=icpx - CXXFLAGS += --intel -pedantic -Wextra -Wshadow -Wmissing-prototypes \ - -Wconditional-uninitialized -Wabi -Wdeprecated -endif - -ifeq ($(COMP),clang) - comp=clang - CXX=clang++ - ifeq ($(target_windows),yes) - CXX=x86_64-w64-mingw32-clang++ - endif - - CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-prototypes \ - -Wconditional-uninitialized -flax-vector-conversions=none - - ifeq ($(filter $(KERNEL),Darwin OpenBSD FreeBSD),) - ifeq ($(target_windows),) - ifneq ($(RTLIB),compiler-rt) - LDFLAGS += -latomic - endif - endif - endif - - ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64)) - ifeq ($(OS),Android) - CXXFLAGS += -m$(bits) - LDFLAGS += -m$(bits) - endif - ifeq ($(ARCH),riscv64) - CXXFLAGS += -latomic - endif - else ifeq ($(arch),loongarch64) - CXXFLAGS += -latomic - else - CXXFLAGS += -m$(bits) - LDFLAGS += -m$(bits) - endif -endif - -ifeq ($(KERNEL),Darwin) - CXXFLAGS += -mmacosx-version-min=10.15 - LDFLAGS += -mmacosx-version-min=10.15 - ifneq ($(arch),any) - CXXFLAGS += -arch $(arch) - LDFLAGS += -arch $(arch) - endif - XCRUN = xcrun -endif - -# To cross-compile for Android, use NDK version r27c or later. -ifeq ($(COMP),ndk) - CXXFLAGS += -stdlib=libc++ - comp=clang - ifeq ($(arch),armv7) - CXX=armv7a-linux-androideabi29-clang++ - CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon - ifneq ($(shell which arm-linux-androideabi-strip 2>/dev/null),) - STRIP=arm-linux-androideabi-strip - else - STRIP=llvm-strip - endif - endif - ifeq ($(arch),armv8) - CXX=aarch64-linux-android29-clang++ - ifneq ($(shell which aarch64-linux-android-strip 2>/dev/null),) - STRIP=aarch64-linux-android-strip - else - STRIP=llvm-strip - endif - endif - ifeq ($(arch),x86_64) - CXX=x86_64-linux-android29-clang++ - ifneq ($(shell which x86_64-linux-android-strip 2>/dev/null),) - STRIP=x86_64-linux-android-strip - else - STRIP=llvm-strip - endif - endif - LDFLAGS += -static-libstdc++ -endif - -### Allow overwriting CXX from command line -ifdef COMPCXX - CXX=$(COMPCXX) -endif - -# llvm-profdata must be version compatible with the specified CXX (be it clang, or the gcc alias) -# make -j profile-build CXX=clang++-20 COMP=clang -# Locate the version in the same directory as the compiler used, -# with fallback to a generic one if it can't be located - LLVM_PROFDATA := $(dir $(realpath $(shell which $(CXX) 2> /dev/null)))llvm-profdata -# for icx -ifeq ($(wildcard $(LLVM_PROFDATA)),) - LLVM_PROFDATA := $(dir $(realpath $(shell which $(CXX) 2> /dev/null)))/compiler/llvm-profdata -endif -ifeq ($(wildcard $(LLVM_PROFDATA)),) - LLVM_PROFDATA := llvm-profdata -endif - -ifeq ($(comp),icx) - profile_make = icx-profile-make - profile_use = icx-profile-use -else ifeq ($(comp),clang) - profile_make = clang-profile-make - profile_use = clang-profile-use -else - profile_make = gcc-profile-make - profile_use = gcc-profile-use - ifeq ($(KERNEL),Darwin) - EXTRAPROFILEFLAGS = -fvisibility=hidden - endif -endif - -### Sometimes gcc is really clang -ifeq ($(COMP),gcc) - gccversion := $(shell $(CXX) --version 2>/dev/null) - gccisclang := $(findstring clang,$(gccversion)) - ifneq ($(gccisclang),) - profile_make = clang-profile-make - profile_use = clang-profile-use - else - CXXFLAGS += -Wstack-usage=128000 - endif -endif - -### On mingw use Windows threads, otherwise POSIX -ifneq ($(comp),mingw) - CXXFLAGS += -DUSE_PTHREADS - # On Android Bionic's C library comes with its own pthread implementation bundled in - ifneq ($(OS),Android) - # Haiku has pthreads in its libroot, so only link it in on other platforms - ifneq ($(KERNEL),Haiku) - ifneq ($(COMP),ndk) - LDFLAGS += -lpthread - - add_lrt = yes - ifeq ($(target_windows),yes) - add_lrt = no - endif - - ifeq ($(KERNEL),Darwin) - add_lrt = no - endif - - ifeq ($(add_lrt),yes) - LDFLAGS += -lrt - endif - endif - endif - endif -endif - -### 3.2.1 Debugging -ifeq ($(debug),no) - CXXFLAGS += -DNDEBUG -else - CXXFLAGS += -g - CXXFLAGS += -D_GLIBCXX_ASSERTIONS -D_GLIBCXX_DEBUG -endif - -### 3.2.2 Debugging with undefined behavior sanitizers -ifneq ($(sanitize),none) - CXXFLAGS += -g3 $(addprefix -fsanitize=,$(sanitize)) - LDFLAGS += $(addprefix -fsanitize=,$(sanitize)) -endif - -### 3.3 Optimization -ifeq ($(optimize),yes) - - CXXFLAGS += -O3 -funroll-loops - - ifeq ($(comp),gcc) - ifeq ($(OS), Android) - CXXFLAGS += -fno-gcse -mthumb -march=armv7-a -mfloat-abi=softfp - endif - endif - - ifeq ($(KERNEL),Darwin) - ifeq ($(comp),$(filter $(comp),clang icx)) - CXXFLAGS += -mdynamic-no-pic - endif - - ifeq ($(comp),gcc) - ifneq ($(arch),arm64) - CXXFLAGS += -mdynamic-no-pic - endif - endif - endif - - ifeq ($(comp),clang) - clangmajorversion := $(shell $(CXX) -dumpversion 2>/dev/null | cut -f1 -d.) - ifeq ($(shell expr $(clangmajorversion) \< 16),1) - CXXFLAGS += -fexperimental-new-pass-manager - endif - endif -endif - -### 3.4 Bits -ifeq ($(bits),64) - CXXFLAGS += -DIS_64BIT -endif - -### 3.5 prefetch and popcount -ifeq ($(prefetch),yes) - ifeq ($(sse),yes) - CXXFLAGS += -msse - endif -else - CXXFLAGS += -DNO_PREFETCH -endif - -ifeq ($(popcnt),yes) - ifeq ($(arch),$(filter $(arch),ppc64 ppc64-altivec ppc64-vsx armv7 armv8 arm64)) - CXXFLAGS += -DUSE_POPCNT - else - CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT - endif -endif - -### 3.6 SIMD architectures -ifeq ($(avx2),yes) - CXXFLAGS += -DUSE_AVX2 - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mavx2 -mbmi - endif -endif - -ifeq ($(avxvnni),yes) - CXXFLAGS += -DUSE_VNNI -DUSE_AVXVNNI - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mavxvnni - endif -endif - -ifeq ($(avx512),yes) - CXXFLAGS += -DUSE_AVX512 - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mavx512f -mavx512bw -mavx512dq -mavx512vl - endif -endif - -ifeq ($(vnni512),yes) - CXXFLAGS += -DUSE_VNNI - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl - endif -endif - -ifeq ($(avx512icl),yes) - CXXFLAGS += -DUSE_AVX512 -DUSE_VNNI -DUSE_AVX512ICL - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx512vpopcntdq -mavx512bitalg -mavx512vnni -mvpclmulqdq -mgfni -mvaes - endif -endif - -ifeq ($(sse41),yes) - CXXFLAGS += -DUSE_SSE41 - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -msse4.1 - endif -endif - -ifeq ($(ssse3),yes) - CXXFLAGS += -DUSE_SSSE3 - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mssse3 - endif -endif - -ifeq ($(sse2),yes) - CXXFLAGS += -DUSE_SSE2 - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -msse2 - endif -endif - -ifeq ($(mmx),yes) - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mmmx - endif -endif - -ifeq ($(altivec),yes) - CXXFLAGS += -maltivec - ifeq ($(COMP),gcc) - CXXFLAGS += -mabi=altivec - endif -endif - -ifeq ($(vsx),yes) - CXXFLAGS += -mvsx - ifeq ($(COMP),gcc) - CXXFLAGS += -DNO_WARN_X86_INTRINSICS -DUSE_SSE2 - endif -endif - -ifeq ($(neon),yes) - CXXFLAGS += -DUSE_NEON=$(arm_version) - ifeq ($(KERNEL),Linux) - ifneq ($(COMP),ndk) - ifneq ($(arch),armv8) - CXXFLAGS += -mfpu=neon - endif - endif - endif -endif - -ifeq ($(dotprod),yes) - CXXFLAGS += -march=armv8.2-a+dotprod -DUSE_NEON_DOTPROD -endif - -ifeq ($(lasx),yes) - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mlasx - endif -endif - -ifeq ($(lsx),yes) - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mlsx - endif -endif - -### 3.7 pext -ifeq ($(pext),yes) - CXXFLAGS += -DUSE_PEXT - ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) - CXXFLAGS += -mbmi2 - endif -endif - -### 3.8.1 Try to include git commit sha for versioning -GIT_SHA := $(shell git rev-parse HEAD 2>/dev/null | cut -c 1-8) -ifneq ($(GIT_SHA), ) - CXXFLAGS += -DGIT_SHA=$(GIT_SHA) -endif - -### 3.8.2 Try to include git commit date for versioning -GIT_DATE := $(shell git show -s --date=format:'%Y%m%d' --format=%cd HEAD 2>/dev/null) -ifneq ($(GIT_DATE), ) - CXXFLAGS += -DGIT_DATE=$(GIT_DATE) -endif - -### 3.8.3 Try to include architecture -ifneq ($(ARCH), ) - CXXFLAGS += -DARCH=$(ARCH) -endif - -### 3.9 Link Time Optimization -### This is a mix of compile and link time options because the lto link phase -### needs access to the optimization flags. -ifeq ($(optimize),yes) -ifeq ($(debug),no) - ifneq ($(KERNEL),Darwin) - LLD_BIN := $(shell command -v ld.lld 2>/dev/null) - ifeq ($(LLD_BIN),) - LLD_BIN := $(shell command -v lld 2>/dev/null) - endif - ifneq ($(LLD_BIN),) - ifeq ($(comp),clang) - LDFLAGS += -fuse-ld=lld - else ifeq ($(comp),gcc) - ifneq ($(gccisclang),) - LDFLAGS += -fuse-ld=lld - endif - endif - endif - endif - - ifeq ($(comp),$(filter $(comp),clang icx)) - CXXFLAGS += -flto=full - ifeq ($(comp),icx) - CXXFLAGS += -fwhole-program-vtables - endif - LDFLAGS += $(CXXFLAGS) - -# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be -# GCC on some systems. - else ifeq ($(comp),gcc) - ifeq ($(gccisclang),) - CXXFLAGS += -flto -flto-partition=one - LDFLAGS += $(CXXFLAGS) -flto=jobserver - else - CXXFLAGS += -flto=full - LDFLAGS += $(CXXFLAGS) - endif - -# To use LTO and static linking on Windows, -# the tool chain requires gcc version 10.1 or later. - else ifeq ($(comp),mingw) - CXXFLAGS += -flto -flto-partition=one - LDFLAGS += $(CXXFLAGS) -save-temps - endif -endif -endif - -### 3.10 Android 5 can only run position independent executables. Note that this -### breaks Android 4.0 and earlier. -ifeq ($(OS), Android) - CXXFLAGS += -fPIE - LDFLAGS += -fPIE -pie -endif - -### 3.11 Inline settings -ifeq ($(optimize), yes) - ifeq ($(comp), clang) - CXXFLAGS += -Xclang -mllvm -Xclang -inline-threshold=500 - endif -endif - -### ========================================================================== -### Section 4. Public Targets -### ========================================================================== - -help: - @echo "" && \ - echo "To compile stockfish, type: " && \ - echo "" && \ - echo "make -j target [ARCH=arch] [COMP=compiler] [COMPCXX=cxx]" && \ - echo "" && \ - echo "Supported targets:" && \ - echo "" && \ - echo "help > Display architecture details" && \ - echo "profile-build > standard build with profile-guided optimization" && \ - echo "build > skip profile-guided optimization" && \ - echo "net > Download the default nnue nets" && \ - echo "strip > Strip executable" && \ - echo "install > Install executable" && \ - echo "clean > Clean up" && \ - echo "" && \ - echo "Supported archs:" && \ - echo "" && \ - echo "native > select the best architecture for the host processor (default)" && \ - echo "x86-64-avx512icl > x86 64-bit with minimum avx512 support of Intel Ice Lake or AMD Zen 4" && \ - echo "x86-64-vnni512 > x86 64-bit with vnni 512bit support" && \ - echo "x86-64-avx512 > x86 64-bit with avx512 support" && \ - echo "x86-64-avxvnni > x86 64-bit with vnni 256bit support" && \ - echo "x86-64-bmi2 > x86 64-bit with bmi2 support" && \ - echo "x86-64-avx2 > x86 64-bit with avx2 support" && \ - echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" && \ - echo "x86-64-modern > deprecated, currently x86-64-sse41-popcnt" && \ - echo "x86-64-ssse3 > x86 64-bit with ssse3 support" && \ - echo "x86-64-sse3-popcnt > x86 64-bit with sse3 compile and popcnt support" && \ - echo "x86-64 > x86 64-bit generic (with sse2 support)" && \ - echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" && \ - echo "x86-32-sse2 > x86 32-bit with sse2 support" && \ - echo "x86-32 > x86 32-bit generic (with mmx compile support)" && \ - echo "ppc-64 > PPC 64-bit" && \ - echo "ppc-64-altivec > PPC 64-bit with altivec support" && \ - echo "ppc-64-vsx > PPC 64-bit with vsx support" && \ - echo "ppc-32 > PPC 32-bit" && \ - echo "armv7 > ARMv7 32-bit" && \ - echo "armv7-neon > ARMv7 32-bit with popcnt and neon" && \ - echo "armv8 > ARMv8 64-bit with popcnt and neon" && \ - echo "armv8-dotprod > ARMv8 64-bit with popcnt, neon and dot product support" && \ - echo "e2k > Elbrus 2000" && \ - echo "apple-silicon > Apple silicon ARM64" && \ - echo "general-64 > unspecified 64-bit" && \ - echo "general-32 > unspecified 32-bit" && \ - echo "riscv64 > RISC-V 64-bit" && \ - echo "loongarch64 > LoongArch 64-bit" && \ - echo "loongarch64-lsx > LoongArch 64-bit with SIMD eXtension" && \ - echo "loongarch64-lasx > LoongArch 64-bit with Advanced SIMD eXtension" && \ - echo "" && \ - echo "Supported compilers:" && \ - echo "" && \ - echo "gcc > GNU compiler (default)" && \ - echo "mingw > GNU compiler with MinGW under Windows" && \ - echo "clang > LLVM Clang compiler" && \ - echo "icx > Intel oneAPI DPC++/C++ Compiler" && \ - echo "ndk > Google NDK to cross-compile for Android" && \ - echo "" && \ - echo "Simple examples. If you don't know what to do, you likely want to run one of: " && \ - echo "" && \ - echo "make -j profile-build ARCH=x86-64-avx2 # typically a fast compile for common systems " && \ - echo "make -j profile-build ARCH=x86-64-sse41-popcnt # A more portable compile for 64-bit systems " && \ - echo "make -j profile-build ARCH=x86-64 # A portable compile for 64-bit systems " && \ - echo "" && \ - echo "Advanced examples, for experienced users: " && \ - echo "" && \ - echo "make -j profile-build ARCH=x86-64-avxvnni" && \ - echo "make -j profile-build ARCH=x86-64-avxvnni COMP=gcc COMPCXX=g++-12.0" && \ - echo "make -j build ARCH=x86-64-ssse3 COMP=clang" && \ - echo "" -ifneq ($(SUPPORTED_ARCH), true) - @echo "Specify a supported architecture with the ARCH option for more details" - @echo "" -endif - - -.PHONY: help analyze build profile-build strip install clean net \ - objclean profileclean config-sanity \ - icx-profile-use icx-profile-make \ - gcc-profile-use gcc-profile-make \ - clang-profile-use clang-profile-make FORCE \ - format analyze - -analyze: net config-sanity objclean - $(MAKE) -k ARCH=$(ARCH) COMP=$(COMP) $(OBJS) - -build: net config-sanity - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all - -profile-build: net config-sanity objclean profileclean - @echo "" - @echo "Step 1/4. Building instrumented executable ..." - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) - @echo "" - @echo "Step 2/4. Running benchmark for pgo-build ..." - $(PGOBENCH) > PGOBENCH.out 2>&1 - tail -n 4 PGOBENCH.out - @echo "" - @echo "Step 3/4. Building optimized executable ..." - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use) - @echo "" - @echo "Step 4/4. Deleting profile data ..." - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean - -strip: - $(STRIP) $(EXE) - -install: - -mkdir -p -m 755 $(BINDIR) - -cp $(EXE) $(BINDIR) - $(STRIP) $(BINDIR)/$(EXE) - -# clean all -clean: objclean profileclean - @rm -f .depend *~ core - -# clean binaries and objects -objclean: - @rm -f stockfish stockfish.exe *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o - -# clean auxiliary profiling files -profileclean: - @rm -rf profdir - @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s PGOBENCH.out - @rm -f stockfish.profdata *.profraw - @rm -f stockfish.*args* - @rm -f stockfish.*lt* - @rm -f stockfish.res - @rm -f ./-lstdc++.res - -# evaluation network (nnue) -net: - @$(SHELL) ../scripts/net.sh - -format: - $(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file - -### ========================================================================== -### Section 5. Private Targets -### ========================================================================== - -all: $(EXE) .depend - -config-sanity: net - @echo "" - @echo "Config:" && \ - echo "debug: '$(debug)'" && \ - echo "sanitize: '$(sanitize)'" && \ - echo "optimize: '$(optimize)'" && \ - echo "arch: '$(arch)'" && \ - echo "bits: '$(bits)'" && \ - echo "kernel: '$(KERNEL)'" && \ - echo "os: '$(OS)'" && \ - echo "prefetch: '$(prefetch)'" && \ - echo "popcnt: '$(popcnt)'" && \ - echo "pext: '$(pext)'" && \ - echo "sse: '$(sse)'" && \ - echo "mmx: '$(mmx)'" && \ - echo "sse2: '$(sse2)'" && \ - echo "ssse3: '$(ssse3)'" && \ - echo "sse41: '$(sse41)'" && \ - echo "avx2: '$(avx2)'" && \ - echo "avxvnni: '$(avxvnni)'" && \ - echo "avx512: '$(avx512)'" && \ - echo "vnni512: '$(vnni512)'" && \ - echo "avx512icl: '$(avx512icl)'" && \ - echo "altivec: '$(altivec)'" && \ - echo "vsx: '$(vsx)'" && \ - echo "neon: '$(neon)'" && \ - echo "dotprod: '$(dotprod)'" && \ - echo "arm_version: '$(arm_version)'" && \ - echo "lsx: '$(lsx)'" && \ - echo "lasx: '$(lasx)'" && \ - echo "target_windows: '$(target_windows)'" && \ - echo "" && \ - echo "Flags:" && \ - echo "CXX: $(CXX)" && \ - echo "CXXFLAGS: $(CXXFLAGS)" && \ - echo "LDFLAGS: $(LDFLAGS)" && \ - echo "" && \ - echo "Testing config sanity. If this fails, try 'make help' ..." && \ - echo "" && \ - (test "$(debug)" = "yes" || test "$(debug)" = "no") && \ - (test "$(optimize)" = "yes" || test "$(optimize)" = "no") && \ - (test "$(SUPPORTED_ARCH)" = "true") && \ - (test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ - test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \ - test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" || \ - test "$(arch)" = "riscv64" || test "$(arch)" = "loongarch64") && \ - (test "$(bits)" = "32" || test "$(bits)" = "64") && \ - (test "$(prefetch)" = "yes" || test "$(prefetch)" = "no") && \ - (test "$(popcnt)" = "yes" || test "$(popcnt)" = "no") && \ - (test "$(pext)" = "yes" || test "$(pext)" = "no") && \ - (test "$(sse)" = "yes" || test "$(sse)" = "no") && \ - (test "$(mmx)" = "yes" || test "$(mmx)" = "no") && \ - (test "$(sse2)" = "yes" || test "$(sse2)" = "no") && \ - (test "$(ssse3)" = "yes" || test "$(ssse3)" = "no") && \ - (test "$(sse41)" = "yes" || test "$(sse41)" = "no") && \ - (test "$(avx2)" = "yes" || test "$(avx2)" = "no") && \ - (test "$(avx512)" = "yes" || test "$(avx512)" = "no") && \ - (test "$(vnni512)" = "yes" || test "$(vnni512)" = "no") && \ - (test "$(avx512icl)" = "yes" || test "$(avx512icl)" = "no") && \ - (test "$(altivec)" = "yes" || test "$(altivec)" = "no") && \ - (test "$(vsx)" = "yes" || test "$(vsx)" = "no") && \ - (test "$(neon)" = "yes" || test "$(neon)" = "no") && \ - (test "$(lsx)" = "yes" || test "$(lsx)" = "no") && \ - (test "$(lasx)" = "yes" || test "$(lasx)" = "no") && \ - (test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || \ - test "$(comp)" = "clang" || test "$(comp)" = "armv7a-linux-androideabi16-clang" || \ - test "$(comp)" = "aarch64-linux-android21-clang") - -$(EXE): $(OBJS) - +$(CXX) -o $@ $(OBJS) $(LDFLAGS) - -# Force recompilation to ensure version info is up-to-date -misc.o: FORCE -FORCE: - -clang-profile-make: - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-fprofile-generate ' \ - EXTRALDFLAGS=' -fprofile-generate' \ - all - -clang-profile-use: - $(XCRUN) $(LLVM_PROFDATA) merge -output=stockfish.profdata *.profraw - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-fprofile-use=stockfish.profdata' \ - EXTRALDFLAGS='-fprofile-use ' \ - all - -gcc-profile-make: - @mkdir -p profdir - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-fprofile-generate=profdir' \ - EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \ - EXTRALDFLAGS='-lgcov' \ - all - -gcc-profile-use: - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-fprofile-use=profdir -fno-peel-loops -fno-tracer' \ - EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \ - EXTRALDFLAGS='-lgcov' \ - all - -icx-profile-make: - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-fprofile-instr-generate ' \ - EXTRALDFLAGS=' -fprofile-instr-generate' \ - all - -icx-profile-use: - $(XCRUN) $(LLVM_PROFDATA) merge -output=stockfish.profdata *.profraw - $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ - EXTRALDFLAGS='-fprofile-use ' \ - all - -.depend: $(SRCS) - -@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null - -ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net objclean profileclean format config-sanity)) --include .depend -endif diff --git a/src/benchmark.cpp b/src/benchmark.cpp deleted file mode 100644 index 03bf10ae1cb3ffd95c8e087d22e5deb0f4674fdc..0000000000000000000000000000000000000000 --- a/src/benchmark.cpp +++ /dev/null @@ -1,516 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "benchmark.h" -#include "numa.h" - -#include -#include -#include -#include - -namespace { - -// clang-format off -const std::vector Defaults = { - "setoption name UCI_Chess960 value false", - "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1", - "r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 10", - "8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - - 0 11", - "4rrk1/pp1n3p/3q2pQ/2p1pb2/2PP4/2P3N1/P2B2PP/4RRK1 b - - 7 19", - "rq3rk1/ppp2ppp/1bnpb3/3N2B1/3NP3/7P/PPPQ1PP1/2KR3R w - - 7 14 moves d4e6", - "r1bq1r1k/1pp1n1pp/1p1p4/4p2Q/4Pp2/1BNP4/PPP2PPP/3R1RK1 w - - 2 14 moves g2g4", - "r3r1k1/2p2ppp/p1p1bn2/8/1q2P3/2NPQN2/PPP3PP/R4RK1 b - - 2 15", - "r1bbk1nr/pp3p1p/2n5/1N4p1/2Np1B2/8/PPP2PPP/2KR1B1R w kq - 0 13", - "r1bq1rk1/ppp1nppp/4n3/3p3Q/3P4/1BP1B3/PP1N2PP/R4RK1 w - - 1 16", - "4r1k1/r1q2ppp/ppp2n2/4P3/5Rb1/1N1BQ3/PPP3PP/R5K1 w - - 1 17", - "2rqkb1r/ppp2p2/2npb1p1/1N1Nn2p/2P1PP2/8/PP2B1PP/R1BQK2R b KQ - 0 11", - "r1bq1r1k/b1p1npp1/p2p3p/1p6/3PP3/1B2NN2/PP3PPP/R2Q1RK1 w - - 1 16", - "3r1rk1/p5pp/bpp1pp2/8/q1PP1P2/b3P3/P2NQRPP/1R2B1K1 b - - 6 22", - "r1q2rk1/2p1bppp/2Pp4/p6b/Q1PNp3/4B3/PP1R1PPP/2K4R w - - 2 18", - "4k2r/1pb2ppp/1p2p3/1R1p4/3P4/2r1PN2/P4PPP/1R4K1 b - - 3 22", - "3q2k1/pb3p1p/4pbp1/2r5/PpN2N2/1P2P2P/5PP1/Q2R2K1 b - - 4 26", - "6k1/6p1/6Pp/ppp5/3pn2P/1P3K2/1PP2P2/3N4 b - - 0 1", - "3b4/5kp1/1p1p1p1p/pP1PpP1P/P1P1P3/3KN3/8/8 w - - 0 1", - "2K5/p7/7P/5pR1/8/5k2/r7/8 w - - 0 1 moves g5g6 f3e3 g6g5 e3f3", - "8/6pk/1p6/8/PP3p1p/5P2/4KP1q/3Q4 w - - 0 1", - "7k/3p2pp/4q3/8/4Q3/5Kp1/P6b/8 w - - 0 1", - "8/2p5/8/2kPKp1p/2p4P/2P5/3P4/8 w - - 0 1", - "8/1p3pp1/7p/5P1P/2k3P1/8/2K2P2/8 w - - 0 1", - "8/pp2r1k1/2p1p3/3pP2p/1P1P1P1P/P5KR/8/8 w - - 0 1", - "8/3p4/p1bk3p/Pp6/1Kp1PpPp/2P2P1P/2P5/5B2 b - - 0 1", - "5k2/7R/4P2p/5K2/p1r2P1p/8/8/8 b - - 0 1", - "6k1/6p1/P6p/r1N5/5p2/7P/1b3PP1/4R1K1 w - - 0 1", - "1r3k2/4q3/2Pp3b/3Bp3/2Q2p2/1p1P2P1/1P2KP2/3N4 w - - 0 1", - "6k1/4pp1p/3p2p1/P1pPb3/R7/1r2P1PP/3B1P2/6K1 w - - 0 1", - "8/3p3B/5p2/5P2/p7/PP5b/k7/6K1 w - - 0 1", - "5rk1/q6p/2p3bR/1pPp1rP1/1P1Pp3/P3B1Q1/1K3P2/R7 w - - 93 90", - "4rrk1/1p1nq3/p7/2p1P1pp/3P2bp/3Q1Bn1/PPPB4/1K2R1NR w - - 40 21", - "r3k2r/3nnpbp/q2pp1p1/p7/Pp1PPPP1/4BNN1/1P5P/R2Q1RK1 w kq - 0 16", - "3Qb1k1/1r2ppb1/pN1n2q1/Pp1Pp1Pr/4P2p/4BP2/4B1R1/1R5K b - - 11 40", - "4k3/3q1r2/1N2r1b1/3ppN2/2nPP3/1B1R2n1/2R1Q3/3K4 w - - 5 1", - "1r6/1P4bk/3qr1p1/N6p/3pp2P/6R1/3Q1PP1/1R4K1 w - - 1 42", - - // Positions with high numbers of changed threats - "k7/2n1n3/1nbNbn2/2NbRBn1/1nbRQR2/2NBRBN1/3N1N2/7K w - - 0 1", - "K7/8/8/BNQNQNB1/N5N1/R1Q1q2r/n5n1/bnqnqnbk w - - 0 1", - - // 5-man positions - "8/8/8/8/5kp1/P7/8/1K1N4 w - - 0 1", // Kc2 - mate - "8/8/8/5N2/8/p7/8/2NK3k w - - 0 1", // Na2 - mate - "8/3k4/8/8/8/4B3/4KB2/2B5 w - - 0 1", // draw - - // 6-man positions - "8/8/1P6/5pr1/8/4R3/7k/2K5 w - - 0 1", // Re5 - mate - "8/2p4P/8/kr6/6R1/8/8/1K6 w - - 0 1", // Ka2 - mate - "8/8/3P3k/8/1p6/8/1P6/1K3n2 b - - 0 1", // Nd2 - draw - - // 7-man positions - "8/R7/2q5/8/6k1/8/1P5p/K6R w - - 0 124", // Draw - - // Mate and stalemate positions - "6k1/3b3r/1p1p4/p1n2p2/1PPNpP1q/P3Q1p1/1R1RB1P1/5K2 b - - 0 1", - "r2r1n2/pp2bk2/2p1p2p/3q4/3PN1QP/2P3R1/P4PP1/5RK1 w - - 0 1", - "8/8/8/8/8/6k1/6p1/6K1 w - -", - "7k/7P/6K1/8/3B4/8/8/8 b - -", - - // Chess 960 - "setoption name UCI_Chess960 value true", - "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6", - "nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1", - "setoption name UCI_Chess960 value false" -}; -// clang-format on - -// clang-format off -// human-randomly picked 5 games with <60 moves from -// https://tests.stockfishchess.org/tests/view/665c71f9fd45fb0f907c21e0 -// only moves for one side -const std::vector> BenchmarkPositions = { - { - "rnbq1k1r/ppp1bppp/4pn2/8/2B5/2NP1N2/PPP2PPP/R1BQR1K1 b - - 2 8", - "rnbq1k1r/pp2bppp/4pn2/2p5/2B2B2/2NP1N2/PPP2PPP/R2QR1K1 b - - 1 9", - "r1bq1k1r/pp2bppp/2n1pn2/2p5/2B1NB2/3P1N2/PPP2PPP/R2QR1K1 b - - 3 10", - "r1bq1k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/R2QR1K1 b - - 0 11", - "r1b2k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/3RR1K1 b - - 0 12", - "r1b1k2r/pp2bppp/2n1p3/2p5/2B1PB2/2P2N2/PP3PPP/3RR1K1 b - - 0 13", - "r1b1k2r/1p2bppp/p1n1p3/2p5/4PB2/2P2N2/PP2BPPP/3RR1K1 b - - 1 14", - "r1b1k2r/4bppp/p1n1p3/1pp5/P3PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 15", - "r1b1k2r/4bppp/p1n1p3/1P6/2p1PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 16", - "r1b1k2r/4bppp/2n1p3/1p6/2p1PB2/1PP2N2/4BPPP/3RR1K1 b - - 0 17", - "r3k2r/3bbppp/2n1p3/1p6/2P1PB2/2P2N2/4BPPP/3RR1K1 b - - 0 18", - "r3k2r/3bbppp/2n1p3/8/1pP1P3/2P2N2/3BBPPP/3RR1K1 b - - 1 19", - "1r2k2r/3bbppp/2n1p3/8/1pPNP3/2P5/3BBPPP/3RR1K1 b - - 3 20", - "1r2k2r/3bbppp/2n1p3/8/2PNP3/2B5/4BPPP/3RR1K1 b - - 0 21", - "1r2k2r/3bb1pp/2n1pp2/1N6/2P1P3/2B5/4BPPP/3RR1K1 b - - 1 22", - "1r2k2r/3b2pp/2n1pp2/1N6/1BP1P3/8/4BPPP/3RR1K1 b - - 0 23", - "1r2k2r/3b2pp/4pp2/1N6/1nP1P3/8/3RBPPP/4R1K1 b - - 1 24", - "1r5r/3bk1pp/4pp2/1N6/1nP1PP2/8/3RB1PP/4R1K1 b - - 0 25", - "1r5r/3bk1pp/2n1pp2/1N6/2P1PP2/8/3RBKPP/4R3 b - - 2 26", - "1r5r/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/4R3 b - - 0 27", - "1r1r4/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/R7 b - - 2 28", - "1r1r4/N3k1pp/2n1bp2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 4 29", - "1r1r4/3bk1pp/2N2p2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 0 30", - "1r1R4/4k1pp/2b2p2/4p3/2P1PP2/6P1/4BK1P/R7 b - - 0 31", - "3r4/4k1pp/2b2p2/4P3/2P1P3/6P1/4BK1P/R7 b - - 0 32", - "3r4/R3k1pp/2b5/4p3/2P1P3/6P1/4BK1P/8 b - - 1 33", - "8/3rk1pp/2b5/R3p3/2P1P3/6P1/4BK1P/8 b - - 3 34", - "8/3r2pp/2bk4/R1P1p3/4P3/6P1/4BK1P/8 b - - 0 35", - "8/2kr2pp/2b5/R1P1p3/4P3/4K1P1/4B2P/8 b - - 2 36", - "1k6/3r2pp/2b5/RBP1p3/4P3/4K1P1/7P/8 b - - 4 37", - "8/1k1r2pp/2b5/R1P1p3/4P3/3BK1P1/7P/8 b - - 6 38", - "1k6/3r2pp/2b5/2P1p3/4P3/3BK1P1/7P/R7 b - - 8 39", - "1k6/r5pp/2b5/2P1p3/4P3/3BK1P1/7P/5R2 b - - 10 40", - "1k3R2/6pp/2b5/2P1p3/4P3/r2BK1P1/7P/8 b - - 12 41", - "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 14 42", - "5R2/2k3pp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 16 43", - "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 18 44", - "5R2/2k3pp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 20 45", - "8/2k2Rpp/2b5/2P1p3/4P3/r2B1KP1/7P/8 b - - 22 46", - "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 24 47", - "3k4/5Rpp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 26 48", - "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 28 49", - "3k4/5Rpp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 30 50", - "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 32 51", - "3k4/5Rpp/2b5/2P1p3/4P3/2KB2P1/r6P/8 b - - 34 52", - "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/2K4P/8 b - - 36 53", - "3k4/5Rpp/2b5/2P1p3/4P3/1K1B2P1/r6P/8 b - - 38 54", - "3k4/6Rp/2b5/2P1p3/4P3/1K1B2P1/7r/8 b - - 0 55", - "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 1 56", - "8/2k3R1/2b4p/2P1p3/4P3/1K1B2P1/7r/8 b - - 3 57", - "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 5 58", - "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/7r/8 b - - 7 59", - "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 9 60", - "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/6r1/8 b - - 11 61", - "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 13 62", - "8/2k5/2b3Rp/2P1p3/2K1P3/3B2P1/6r1/8 b - - 15 63", - "4b3/2k3R1/7p/2P1p3/2K1P3/3B2P1/6r1/8 b - - 17 64", - }, - { - "r1bqkbnr/npp1pppp/p7/3P4/4pB2/2N5/PPP2PPP/R2QKBNR w KQkq - 1 6", - "r1bqkb1r/npp1pppp/p4n2/3P4/4pB2/2N5/PPP1QPPP/R3KBNR w KQkq - 3 7", - "r2qkb1r/npp1pppp/p4n2/3P1b2/4pB2/2N5/PPP1QPPP/2KR1BNR w kq - 5 8", - "r2qkb1r/1pp1pppp/p4n2/1n1P1b2/4pB2/2N4P/PPP1QPP1/2KR1BNR w kq - 1 9", - "r2qkb1r/1pp1pppp/5n2/1p1P1b2/4pB2/7P/PPP1QPP1/2KR1BNR w kq - 0 10", - "r2qkb1r/1ppbpppp/5n2/1Q1P4/4pB2/7P/PPP2PP1/2KR1BNR w kq - 1 11", - "3qkb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/2KR1BNR w k - 0 12", - "q3kb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/1K1R1BNR w k - 2 13", - "r3kb1r/2pbpppp/5n2/3P4/4pB2/7P/1PP2PP1/1K1R1BNR w k - 0 14", - "r3kb1r/2Bb1ppp/4pn2/3P4/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 15", - "r3kb1r/2Bb2pp/4pn2/8/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 16", - "r3k2r/2Bb2pp/4pn2/2b5/4p3/7P/1PP1NPP1/1K1R1B1R w k - 2 17", - "r6r/2Bbk1pp/4pn2/2b5/3Np3/7P/1PP2PP1/1K1R1B1R w - - 4 18", - "r6r/b2bk1pp/4pn2/4B3/3Np3/7P/1PP2PP1/1K1R1B1R w - - 6 19", - "r1r5/b2bk1pp/4pn2/4B3/2BNp3/7P/1PP2PP1/1K1R3R w - - 8 20", - "r7/b2bk1pp/4pn2/2r1B3/2BNp3/1P5P/2P2PP1/1K1R3R w - - 1 21", - "rb6/3bk1pp/4pn2/2r1B3/2BNpP2/1P5P/2P3P1/1K1R3R w - - 1 22", - "1r6/3bk1pp/4pn2/2r5/2BNpP2/1P5P/2P3P1/1K1R3R w - - 0 23", - "1r6/3bk1p1/4pn1p/2r5/2BNpP2/1P5P/2P3P1/2KR3R w - - 0 24", - "8/3bk1p1/1r2pn1p/2r5/2BNpP1P/1P6/2P3P1/2KR3R w - - 1 25", - "8/3bk3/1r2pnpp/2r5/2BNpP1P/1P6/2P3P1/2K1R2R w - - 0 26", - "2b5/4k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R2R w - - 1 27", - "8/1b2k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R1R1 w - - 3 28", - "8/1b1nk3/1r2p1pp/2r5/2BNpPPP/1P6/2P5/2K1R1R1 w - - 1 29", - "8/1b2k3/1r2p1pp/2r1nP2/2BNp1PP/1P6/2P5/2K1R1R1 w - - 1 30", - "8/1b2k3/1r2p1p1/2r1nPp1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 31", - "8/1b2k3/1r2p1n1/2r3p1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 32", - "8/1b2k3/1r2p1n1/6r1/2BNp2P/1P6/2P5/2K1R3 w - - 0 33", - "8/1b2k3/1r2p3/4n1P1/2BNp3/1P6/2P5/2K1R3 w - - 1 34", - "8/1b2k3/1r2p3/4n1P1/2BN4/1P2p3/2P5/2K4R w - - 0 35", - "8/1b2k3/1r2p2R/6P1/2nN4/1P2p3/2P5/2K5 w - - 0 36", - "8/1b2k3/3rp2R/6P1/2PN4/4p3/2P5/2K5 w - - 1 37", - "8/4k3/3rp2R/6P1/2PN4/2P1p3/6b1/2K5 w - - 1 38", - "8/4k3/r3p2R/2P3P1/3N4/2P1p3/6b1/2K5 w - - 1 39", - "8/3k4/r3p2R/2P2NP1/8/2P1p3/6b1/2K5 w - - 3 40", - "8/3k4/4p2R/2P3P1/8/2P1N3/6b1/r1K5 w - - 1 41", - "8/3k4/4p2R/2P3P1/8/2P1N3/3K2b1/6r1 w - - 3 42", - "8/3k4/4p2R/2P3P1/8/2PKNb2/8/6r1 w - - 5 43", - "8/4k3/4p1R1/2P3P1/8/2PKNb2/8/6r1 w - - 7 44", - "8/4k3/4p1R1/2P3P1/3K4/2P1N3/8/6rb w - - 9 45", - "8/3k4/4p1R1/2P1K1P1/8/2P1N3/8/6rb w - - 11 46", - "8/3k4/4p1R1/2P3P1/5K2/2P1N3/8/4r2b w - - 13 47", - "8/3k4/2b1p2R/2P3P1/5K2/2P1N3/8/4r3 w - - 15 48", - "8/3k4/2b1p3/2P3P1/5K2/2P1N2R/8/6r1 w - - 17 49", - "2k5/7R/2b1p3/2P3P1/5K2/2P1N3/8/6r1 w - - 19 50", - "2k5/7R/4p3/2P3P1/b1P2K2/4N3/8/6r1 w - - 1 51", - "2k5/3bR3/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 3 52", - "3k4/3b2R1/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 5 53", - "3kb3/6R1/4p1P1/2P5/2P2K2/4N3/8/6r1 w - - 1 54", - "3kb3/6R1/4p1P1/2P5/2P2KN1/8/8/2r5 w - - 3 55", - "3kb3/6R1/4p1P1/2P1N3/2P2K2/8/8/5r2 w - - 5 56", - "3kb3/6R1/4p1P1/2P1N3/2P5/4K3/8/4r3 w - - 7 57", - }, - { - "rnbq1rk1/ppp1npb1/4p1p1/3P3p/3PP3/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 8", - "rnbq1rk1/ppp1npb1/6p1/3pP2p/3P4/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 9", - "rn1q1rk1/ppp1npb1/6p1/3pP2p/3P2b1/2N2N2/PP2BPPP/R1BQR1K1 b - - 2 10", - "r2q1rk1/ppp1npb1/2n3p1/3pP2p/3P2bN/2N5/PP2BPPP/R1BQR1K1 b - - 4 11", - "r4rk1/pppqnpb1/2n3p1/3pP2p/3P2bN/2N4P/PP2BPP1/R1BQR1K1 b - - 0 12", - "r4rk1/pppqnpb1/2n3p1/3pP2p/3P3N/7P/PP2NPP1/R1BQR1K1 b - - 0 13", - "r4rk1/pppq1pb1/2n3p1/3pPN1p/3P4/7P/PP2NPP1/R1BQR1K1 b - - 0 14", - "r4rk1/ppp2pb1/2n3p1/3pPq1p/3P1N2/7P/PP3PP1/R1BQR1K1 b - - 1 15", - "r4rk1/pppq1pb1/2n3p1/3pP2p/P2P1N2/7P/1P3PP1/R1BQR1K1 b - - 0 16", - "r2n1rk1/pppq1pb1/6p1/3pP2p/P2P1N2/R6P/1P3PP1/2BQR1K1 b - - 2 17", - "r4rk1/pppq1pb1/4N1p1/3pP2p/P2P4/R6P/1P3PP1/2BQR1K1 b - - 0 18", - "r4rk1/ppp2pb1/4q1p1/3pP1Bp/P2P4/R6P/1P3PP1/3QR1K1 b - - 1 19", - "r3r1k1/ppp2pb1/4q1p1/3pP1Bp/P2P1P2/R6P/1P4P1/3QR1K1 b - - 0 20", - "r3r1k1/ppp3b1/4qpp1/3pP2p/P2P1P1B/R6P/1P4P1/3QR1K1 b - - 1 21", - "r3r1k1/ppp3b1/4q1p1/3pP2p/P4P1B/R6P/1P4P1/3QR1K1 b - - 0 22", - "r4rk1/ppp3b1/4q1p1/3pP1Bp/P4P2/R6P/1P4P1/3QR1K1 b - - 2 23", - "r4rk1/pp4b1/4q1p1/2ppP1Bp/P4P2/3R3P/1P4P1/3QR1K1 b - - 1 24", - "r4rk1/pp4b1/4q1p1/2p1P1Bp/P2p1PP1/3R3P/1P6/3QR1K1 b - - 0 25", - "r4rk1/pp4b1/4q1p1/2p1P1B1/P2p1PP1/3R4/1P6/3QR1K1 b - - 0 26", - "r5k1/pp3rb1/4q1p1/2p1P1B1/P2p1PP1/6R1/1P6/3QR1K1 b - - 2 27", - "5rk1/pp3rb1/4q1p1/2p1P1B1/P2pRPP1/6R1/1P6/3Q2K1 b - - 4 28", - "5rk1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/6R1/1P6/3Q2K1 b - - 0 29", - "4r1k1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 30", - "4r1k1/5rb1/pP2q1p1/2p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 31", - "4r1k1/5rb1/pq4p1/2p1P1B1/3pRPP1/1P4R1/4Q3/6K1 b - - 1 32", - "4r1k1/1r4b1/pq4p1/2p1P1B1/3pRPP1/1P4R1/2Q5/6K1 b - - 3 33", - "4r1k1/1r4b1/1q4p1/p1p1P1B1/3p1PP1/1P4R1/2Q5/4R1K1 b - - 1 34", - "4r1k1/3r2b1/1q4p1/p1p1P1B1/2Qp1PP1/1P4R1/8/4R1K1 b - - 3 35", - "4r1k1/3r2b1/4q1p1/p1p1P1B1/2Qp1PP1/1P4R1/5K2/4R3 b - - 5 36", - "4r1k1/3r2b1/6p1/p1p1P1B1/2Pp1PP1/6R1/5K2/4R3 b - - 0 37", - "4r1k1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/5K2/3R4 b - - 1 38", - "5rk1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/8/3RK3 b - - 3 39", - "5rk1/6b1/6p1/p1p1P1B1/2Pr1PP1/3R4/8/3RK3 b - - 0 40", - "5rk1/3R2b1/6p1/p1p1P1B1/2r2PP1/8/8/3RK3 b - - 1 41", - "5rk1/3R2b1/6p1/p1p1P1B1/4rPP1/8/3K4/3R4 b - - 3 42", - "1r4k1/3R2b1/6p1/p1p1P1B1/4rPP1/2K5/8/3R4 b - - 5 43", - "1r4k1/3R2b1/6p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 7 44", - "1r3bk1/8/3R2p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 9 45", - "1r3bk1/8/6R1/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 0 46", - "1r3b2/5k2/R7/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 2 47", - "5b2/1r3k2/R7/2p1P1B1/p1K2PP1/4r3/8/7R b - - 4 48", - "5b2/5k2/R7/2pKP1B1/pr3PP1/4r3/8/7R b - - 6 49", - "5b2/5k2/R1K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 8 50", - "8/R4kb1/2K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 10 51", - "8/R5b1/2K3k1/2p1PPB1/p2r2P1/4r3/8/7R b - - 0 52", - "8/6R1/2K5/2p1PPk1/p2r2P1/4r3/8/7R b - - 0 53", - "8/6R1/2K5/2p1PP2/p2r1kP1/4r3/8/5R2 b - - 2 54", - "8/6R1/2K2P2/2p1P3/p2r2P1/4r1k1/8/5R2 b - - 0 55", - "8/5PR1/2K5/2p1P3/p2r2P1/4r3/6k1/5R2 b - - 0 56", - }, - { - "rn1qkb1r/p1pbpppp/5n2/8/2pP4/2N5/1PQ1PPPP/R1B1KBNR w KQkq - 0 7", - "r2qkb1r/p1pbpppp/2n2n2/8/2pP4/2N2N2/1PQ1PPPP/R1B1KB1R w KQkq - 2 8", - "r2qkb1r/p1pbpppp/5n2/8/1npPP3/2N2N2/1PQ2PPP/R1B1KB1R w KQkq - 1 9", - "r2qkb1r/p1pb1ppp/4pn2/8/1npPP3/2N2N2/1P3PPP/R1BQKB1R w KQkq - 0 10", - "r2qk2r/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQK2R w KQkq - 1 11", - "r2q1rk1/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQ1RK1 w - - 3 12", - "r2q1rk1/2pbbppp/p3pn2/8/1nBPPB2/2N2N2/1P3PPP/R2Q1RK1 w - - 0 13", - "r2q1rk1/2p1bppp/p3pn2/1b6/1nBPPB2/2N2N2/1P3PPP/R2QR1K1 w - - 2 14", - "r2q1rk1/4bppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/5PPP/R2QR1K1 w - - 0 15", - "r4rk1/3qbppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/3Q1PPP/R3R1K1 w - - 2 16", - "r4rk1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/3Q1PP1/R3R1K1 w - - 1 17", - "r3r1k1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/4QPP1/R3R1K1 w - - 3 18", - "r3r1k1/1q1nbppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/4QPP1/3RR1K1 w - - 5 19", - "r3rbk1/1q1n1ppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R1K1 w - - 7 20", - "r3rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R2K w - - 9 21", - "2r1rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/1R5K w - - 11 22", - "2r1rbk1/1q4pp/pnp1pp2/1b6/1nBPPB2/1PN2N1P/4QPP1/1R1R3K w - - 0 23", - "2r1rbk1/5qpp/pnp1pp2/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R3K w - - 2 24", - "2r1rbk1/5qp1/pnp1pp1p/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R2K1 w - - 0 25", - "2r1rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/n3QPP1/1R1R2K1 w - - 0 26", - "r3rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/1R1R2K1 w - - 1 27", - "rr3bk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/R2R2K1 w - - 3 28", - "rr2qbk1/6p1/pnp1pp1p/1b6/2BPP3/1P2BN1P/4QPP1/R2R2K1 w - - 5 29", - "rr2qbk1/6p1/1np1pp1p/pb6/2BPP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 30", - "rr2qbk1/6p1/1n2pp1p/pp6/3PP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 31", - "rr2qbk1/6p1/1n2pp1p/1p1P4/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 0 32", - "rr2qbk1/3n2p1/3Ppp1p/1p6/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 1 33", - "rr3bk1/3n2p1/3Ppp1p/1p5q/pP2P3/3QBN1P/5PP1/R2R2K1 w - - 1 34", - "rr3bk1/3n2p1/3Ppp1p/1p5q/1P2P3/p2QBN1P/5PP1/2RR2K1 w - - 0 35", - "1r3bk1/3n2p1/r2Ppp1p/1p5q/1P2P3/pQ2BN1P/5PP1/2RR2K1 w - - 2 36", - "1r2qbk1/2Rn2p1/r2Ppp1p/1p6/1P2P3/pQ2BN1P/5PP1/3R2K1 w - - 4 37", - "1r2qbk1/2Rn2p1/r2Ppp1p/1pB5/1P2P3/1Q3N1P/p4PP1/3R2K1 w - - 0 38", - "1r2q1k1/2Rn2p1/r2bpp1p/1pB5/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 39", - "1r2q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 40", - "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 1 41", - "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 3 42", - "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 5 43", - "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 7 44", - "1rq3k1/R2n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 9 45", - "2q3k1/Rr1n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 11 46", - "Rrq3k1/3n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 13 47", - }, - { - "rn1qkb1r/1pp2ppp/p4p2/3p1b2/5P2/1P2PN2/P1PP2PP/RN1QKB1R b KQkq - 1 6", - "r2qkb1r/1pp2ppp/p1n2p2/3p1b2/3P1P2/1P2PN2/P1P3PP/RN1QKB1R b KQkq - 0 7", - "r2qkb1r/1pp2ppp/p4p2/3p1b2/1n1P1P2/1P1BPN2/P1P3PP/RN1QK2R b KQkq - 2 8", - "r2qkb1r/1pp2ppp/p4p2/3p1b2/3P1P2/1P1PPN2/P5PP/RN1QK2R b KQkq - 0 9", - "r2qk2r/1pp2ppp/p2b1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2QK2R b KQkq - 2 10", - "r2qk2r/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2Q1RK1 b kq - 1 11", - "r2q1rk1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P2Q2PP/R4RK1 b - - 3 12", - "r2qr1k1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1P1PPN2/P2QN1PP/R4RK1 b - - 5 13", - "r3r1k1/1p3ppp/pqpb1p2/3p1b2/3P1P2/1P1PPNN1/P2Q2PP/R4RK1 b - - 7 14", - "r3r1k1/1p3ppp/pqp2p2/3p1b2/1b1P1P2/1P1PPNN1/P1Q3PP/R4RK1 b - - 9 15", - "r3r1k1/1p1b1ppp/pqp2p2/3p4/1b1P1P2/1P1PPNN1/P4QPP/R4RK1 b - - 11 16", - "2r1r1k1/1p1b1ppp/pqp2p2/3p4/1b1PPP2/1P1P1NN1/P4QPP/R4RK1 b - - 0 17", - "2r1r1k1/1p1b1ppp/pq3p2/2pp4/1b1PPP2/PP1P1NN1/5QPP/R4RK1 b - - 0 18", - "2r1r1k1/1p1b1ppp/pq3p2/2Pp4/4PP2/PPbP1NN1/5QPP/R4RK1 b - - 0 19", - "2r1r1k1/1p1b1ppp/p4p2/2Pp4/4PP2/PqbP1NN1/5QPP/RR4K1 b - - 1 20", - "2r1r1k1/1p1b1ppp/p4p2/2Pp4/q3PP2/P1bP1NN1/R4QPP/1R4K1 b - - 3 21", - "2r1r1k1/1p3ppp/p4p2/1bPP4/q4P2/P1bP1NN1/R4QPP/1R4K1 b - - 0 22", - "2r1r1k1/1p3ppp/p4p2/2PP4/q4P2/P1bb1NN1/R4QPP/2R3K1 b - - 1 23", - "2r1r1k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R3K1 b - - 0 24", - "2rr2k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R4K b - - 2 25", - "2rr2k1/1p3ppp/p2P1p2/2Q5/5P2/P1bb1NN1/R5PP/2R4K b - - 0 26", - "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1bb1N2/R3N1PP/2R4K b - - 1 27", - "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1b2N2/4R1PP/2R4K b - - 0 28", - "3r2k1/1p3ppp/p2P1p2/2r5/1b3P2/P4N2/4R1PP/3R3K b - - 2 29", - "3r2k1/1p2Rppp/p2P1p2/b1r5/5P2/P4N2/6PP/3R3K b - - 4 30", - "3r2k1/1R3ppp/p1rP1p2/b7/5P2/P4N2/6PP/3R3K b - - 0 31", - "3r2k1/1R3ppp/p2R1p2/b7/5P2/P4N2/6PP/7K b - - 0 32", - "6k1/1R3ppp/p2r1p2/b7/5P2/P4NP1/7P/7K b - - 0 33", - "6k1/1R3p1p/p2r1pp1/b7/5P1P/P4NP1/8/7K b - - 0 34", - "6k1/3R1p1p/pr3pp1/b7/5P1P/P4NP1/8/7K b - - 2 35", - "6k1/5p2/pr3pp1/b2R3p/5P1P/P4NP1/8/7K b - - 1 36", - "6k1/5p2/pr3pp1/7p/5P1P/P1bR1NP1/8/7K b - - 3 37", - "6k1/5p2/p1r2pp1/7p/5P1P/P1bR1NP1/6K1/8 b - - 5 38", - "6k1/5p2/p1r2pp1/b2R3p/5P1P/P4NP1/6K1/8 b - - 7 39", - "6k1/5p2/p4pp1/b2R3p/5P1P/P4NPK/2r5/8 b - - 9 40", - "6k1/2b2p2/p4pp1/7p/5P1P/P2R1NPK/2r5/8 b - - 11 41", - "6k1/2b2p2/5pp1/p6p/3N1P1P/P2R2PK/2r5/8 b - - 1 42", - "6k1/2b2p2/5pp1/p6p/3N1P1P/P1R3PK/r7/8 b - - 3 43", - "6k1/5p2/1b3pp1/p6p/5P1P/P1R3PK/r1N5/8 b - - 5 44", - "8/5pk1/1bR2pp1/p6p/5P1P/P5PK/r1N5/8 b - - 7 45", - "3b4/5pk1/2R2pp1/p4P1p/7P/P5PK/r1N5/8 b - - 0 46", - "8/4bpk1/2R2pp1/p4P1p/6PP/P6K/r1N5/8 b - - 0 47", - "8/5pk1/2R2pP1/p6p/6PP/b6K/r1N5/8 b - - 0 48", - "8/6k1/2R2pp1/p6P/7P/b6K/r1N5/8 b - - 0 49", - "8/6k1/2R2p2/p6p/7P/b5K1/r1N5/8 b - - 1 50", - "8/8/2R2pk1/p6p/7P/b4K2/r1N5/8 b - - 3 51", - "8/8/2R2pk1/p6p/7P/4NK2/rb6/8 b - - 5 52", - "2R5/8/5pk1/7p/p6P/4NK2/rb6/8 b - - 1 53", - "6R1/8/5pk1/7p/p6P/4NK2/1b6/r7 b - - 3 54", - "R7/5k2/5p2/7p/p6P/4NK2/1b6/r7 b - - 5 55", - "R7/5k2/5p2/7p/7P/p3N3/1b2K3/r7 b - - 1 56", - "8/R4k2/5p2/7p/7P/p3N3/1b2K3/7r b - - 3 57", - "8/8/5pk1/7p/R6P/p3N3/1b2K3/7r b - - 5 58", - "8/8/5pk1/7p/R6P/p7/4K3/2bN3r b - - 7 59", - "8/8/5pk1/7p/R6P/p7/4KN1r/2b5 b - - 9 60", - "8/8/5pk1/7p/R6P/p3K3/1b3N1r/8 b - - 11 61", - "8/8/R4pk1/7p/7P/p1b1K3/5N1r/8 b - - 13 62", - "8/8/5pk1/7p/7P/2b1K3/R4N1r/8 b - - 0 63", - "8/8/5pk1/7p/3K3P/8/R4N1r/4b3 b - - 2 64", - } -}; -// clang-format on - -} // namespace - -namespace Stockfish::Benchmark { - -// Builds a list of UCI commands to be run by bench. There -// are five parameters: TT size in MB, number of search threads that -// should be used, the limit value spent for each position, a file name -// where to look for positions in FEN format, and the type of the limit: -// depth, perft, nodes and movetime (in milliseconds). Examples: -// -// bench : search default positions up to depth 13 -// bench 64 1 15 : search default positions up to depth 15 (TT = 64MB) -// bench 64 1 100000 default nodes : search default positions for 100K nodes each -// bench 64 4 5000 current movetime : search current position with 4 threads for 5 sec -// bench 16 1 5 blah perft : run a perft 5 on positions in file "blah" -std::vector setup_bench(const std::string& currentFen, std::istream& is) { - - std::vector fens, list; - std::string go, token; - - // Assign default values to missing arguments - std::string ttSize = (is >> token) ? token : "16"; - std::string threads = (is >> token) ? token : "1"; - std::string limit = (is >> token) ? token : "13"; - std::string fenFile = (is >> token) ? token : "default"; - std::string limitType = (is >> token) ? token : "depth"; - - go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; - - if (fenFile == "default") - fens = Defaults; - - else if (fenFile == "current") - fens.push_back(currentFen); - - else - { - std::string fen; - std::ifstream file(fenFile); - - if (!file.is_open()) - { - std::cerr << "Unable to open file " << fenFile << std::endl; - exit(EXIT_FAILURE); - } - - while (getline(file, fen)) - if (!fen.empty()) - fens.push_back(fen); - - file.close(); - } - - list.emplace_back("setoption name Threads value " + threads); - list.emplace_back("setoption name Hash value " + ttSize); - list.emplace_back("ucinewgame"); - - for (const std::string& fen : fens) - if (fen.find("setoption") != std::string::npos) - list.emplace_back(fen); - else - { - list.emplace_back("position fen " + fen); - list.emplace_back(go); - } - - return list; -} - -BenchmarkSetup setup_benchmark(std::istream& is) { - // TT_SIZE_PER_THREAD is chosen such that roughly half of the hash is used all positions - // for the current sequence have been searched. - static constexpr int TT_SIZE_PER_THREAD = 128; - - static constexpr int DEFAULT_DURATION_S = 150; - - BenchmarkSetup setup{}; - - // Assign default values to missing arguments - int desiredTimeS; - - if (!(is >> setup.threads)) - setup.threads = int(get_hardware_concurrency()); - else - setup.originalInvocation += std::to_string(setup.threads); - - if (!(is >> setup.ttSize)) - setup.ttSize = TT_SIZE_PER_THREAD * setup.threads; - else - setup.originalInvocation += " " + std::to_string(setup.ttSize); - - if (!(is >> desiredTimeS)) - desiredTimeS = DEFAULT_DURATION_S; - else - setup.originalInvocation += " " + std::to_string(desiredTimeS); - - setup.filledInvocation += std::to_string(setup.threads) + " " + std::to_string(setup.ttSize) - + " " + std::to_string(desiredTimeS); - - auto getCorrectedTime = [&](int ply) { - // time per move is fit roughly based on LTC games - // seconds = 50/{ply+15} - // ms = 50000/{ply+15} - // with this fit 10th move gets 2000ms - // adjust for desired 10th move time - return 50000.0 / (static_cast(ply) + 15.0); - }; - - float totalTime = 0; - for (const auto& game : BenchmarkPositions) - { - int ply = 1; - for (int i = 0; i < static_cast(game.size()); ++i) - { - const float correctedTime = float(getCorrectedTime(ply)); - totalTime += correctedTime; - ply += 1; - } - } - - float timeScaleFactor = static_cast(desiredTimeS * 1000) / totalTime; - - for (const auto& game : BenchmarkPositions) - { - setup.commands.emplace_back("ucinewgame"); - int ply = 1; - for (const std::string& fen : game) - { - setup.commands.emplace_back("position fen " + fen); - - const int correctedTime = static_cast(getCorrectedTime(ply) * timeScaleFactor); - setup.commands.emplace_back("go movetime " + std::to_string(correctedTime)); - - ply += 1; - } - } - - return setup; -} - -} // namespace Stockfish diff --git a/src/benchmark.h b/src/benchmark.h deleted file mode 100644 index a6606e78cad6feb990fc4b84ddc7fe7e31d1dc5f..0000000000000000000000000000000000000000 --- a/src/benchmark.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef BENCHMARK_H_INCLUDED -#define BENCHMARK_H_INCLUDED - -#include -#include -#include - -namespace Stockfish::Benchmark { - -std::vector setup_bench(const std::string&, std::istream&); - -struct BenchmarkSetup { - int ttSize; - int threads; - std::vector commands; - std::string originalInvocation; - std::string filledInvocation; -}; - -BenchmarkSetup setup_benchmark(std::istream&); - -} // namespace Stockfish - -#endif // #ifndef BENCHMARK_H_INCLUDED diff --git a/src/bitboard.cpp b/src/bitboard.cpp deleted file mode 100644 index 0861222cf0e108380ceb296f0449c37a49ce9d8b..0000000000000000000000000000000000000000 --- a/src/bitboard.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "bitboard.h" - -#include -#include -#include - -#include "misc.h" - -namespace Stockfish { - -uint8_t PopCnt16[1 << 16]; -uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; - -Bitboard LineBB[SQUARE_NB][SQUARE_NB]; -Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; -Bitboard RayPassBB[SQUARE_NB][SQUARE_NB]; - -alignas(64) Magic Magics[SQUARE_NB][2]; - -namespace { - -Bitboard RookTable[0x19000]; // To store rook attacks -Bitboard BishopTable[0x1480]; // To store bishop attacks - -void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]); -} - -// Returns an ASCII representation of a bitboard suitable -// to be printed to standard output. Useful for debugging. -std::string Bitboards::pretty(Bitboard b) { - - std::string s = "+---+---+---+---+---+---+---+---+\n"; - - for (Rank r = RANK_8;; --r) - { - for (File f = FILE_A; f <= FILE_H; ++f) - s += b & make_square(f, r) ? "| X " : "| "; - - s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n"; - - if (r == RANK_1) - break; - } - s += " a b c d e f g h\n"; - - return s; -} - - -// Initializes various bitboard tables. It is called at -// startup and relies on global objects to be already zero-initialized. -void Bitboards::init() { - - for (unsigned i = 0; i < (1 << 16); ++i) - PopCnt16[i] = uint8_t(std::bitset<16>(i).count()); - - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) - SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); - - init_magics(ROOK, RookTable, Magics); - init_magics(BISHOP, BishopTable, Magics); - - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - { - for (PieceType pt : {BISHOP, ROOK}) - for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) - { - if (PseudoAttacks[pt][s1] & s2) - { - LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2; - BetweenBB[s1][s2] = - (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1))); - RayPassBB[s1][s2] = - attacks_bb(pt, s1, 0) & (attacks_bb(pt, s2, square_bb(s1)) | s2); - } - BetweenBB[s1][s2] |= s2; - } - } -} - -namespace { -// Computes all rook and bishop attacks at startup. Magic -// bitboards are used to look up attacks of sliding pieces. As a reference see -// https://www.chessprogramming.org/Magic_Bitboards. In particular, here we use -// the so called "fancy" approach. -void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]) { - -#ifndef USE_PEXT - // Optimal PRNG seeds to pick the correct magics in the shortest time - int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020}, - {728, 10316, 55013, 32803, 12281, 15100, 16645, 255}}; - - Bitboard occupancy[4096]; - int epoch[4096] = {}, cnt = 0; -#endif - Bitboard reference[4096]; - int size = 0; - - for (Square s = SQ_A1; s <= SQ_H8; ++s) - { - // Board edges are not considered in the relevant occupancies - Bitboard edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s)); - - // Given a square 's', the mask is the bitboard of sliding attacks from - // 's' computed on an empty board. The index must be big enough to contain - // all the attacks for each possible subset of the mask and so is 2 power - // the number of 1s of the mask. Hence we deduce the size of the shift to - // apply to the 64 or 32 bits word to get the index. - Magic& m = magics[s][pt - BISHOP]; - m.mask = Bitboards::sliding_attack(pt, s, 0) & ~edges; -#ifndef USE_PEXT - m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); -#endif - // Set the offset for the attacks table of the square. We have individual - // table sizes for each square with "Fancy Magic Bitboards". - m.attacks = s == SQ_A1 ? table : magics[s - 1][pt - BISHOP].attacks + size; - size = 0; - - // Use Carry-Rippler trick to enumerate all subsets of masks[s] and - // store the corresponding sliding attack bitboard in reference[]. - Bitboard b = 0; - do - { -#ifndef USE_PEXT - occupancy[size] = b; -#endif - reference[size] = Bitboards::sliding_attack(pt, s, b); - - if (HasPext) - m.attacks[pext(b, m.mask)] = reference[size]; - - size++; - b = (b - m.mask) & m.mask; - } while (b); - -#ifndef USE_PEXT - PRNG rng(seeds[Is64Bit][rank_of(s)]); - - // Find a magic for square 's' picking up an (almost) random number - // until we find the one that passes the verification test. - for (int i = 0; i < size;) - { - for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6;) - m.magic = rng.sparse_rand(); - - // A good magic must map every possible occupancy to an index that - // looks up the correct sliding attack in the attacks[s] database. - // Note that we build up the database for square 's' as a side - // effect of verifying the magic. Keep track of the attempt count - // and save it in epoch[], little speed-up trick to avoid resetting - // m.attacks[] after every failed attempt. - for (++cnt, i = 0; i < size; ++i) - { - unsigned idx = m.index(occupancy[i]); - - if (epoch[idx] < cnt) - { - epoch[idx] = cnt; - m.attacks[idx] = reference[i]; - } - else if (m.attacks[idx] != reference[i]) - break; - } - } -#endif - } -} -} - -} // namespace Stockfish diff --git a/src/bitboard.h b/src/bitboard.h deleted file mode 100644 index 7d36b0a62ed65ee7982e11f378c518b1a8616e2b..0000000000000000000000000000000000000000 --- a/src/bitboard.h +++ /dev/null @@ -1,458 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef BITBOARD_H_INCLUDED -#define BITBOARD_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "types.h" - -namespace Stockfish { - -namespace Bitboards { - -void init(); -std::string pretty(Bitboard b); - -} // namespace Stockfish::Bitboards - -constexpr Bitboard FileABB = 0x0101010101010101ULL; -constexpr Bitboard FileBBB = FileABB << 1; -constexpr Bitboard FileCBB = FileABB << 2; -constexpr Bitboard FileDBB = FileABB << 3; -constexpr Bitboard FileEBB = FileABB << 4; -constexpr Bitboard FileFBB = FileABB << 5; -constexpr Bitboard FileGBB = FileABB << 6; -constexpr Bitboard FileHBB = FileABB << 7; - -constexpr Bitboard Rank1BB = 0xFF; -constexpr Bitboard Rank2BB = Rank1BB << (8 * 1); -constexpr Bitboard Rank3BB = Rank1BB << (8 * 2); -constexpr Bitboard Rank4BB = Rank1BB << (8 * 3); -constexpr Bitboard Rank5BB = Rank1BB << (8 * 4); -constexpr Bitboard Rank6BB = Rank1BB << (8 * 5); -constexpr Bitboard Rank7BB = Rank1BB << (8 * 6); -constexpr Bitboard Rank8BB = Rank1BB << (8 * 7); - -extern uint8_t PopCnt16[1 << 16]; -extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; - -extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; -extern Bitboard LineBB[SQUARE_NB][SQUARE_NB]; -extern Bitboard RayPassBB[SQUARE_NB][SQUARE_NB]; - -// Magic holds all magic bitboards relevant data for a single square -struct Magic { - Bitboard mask; - Bitboard* attacks; -#ifndef USE_PEXT - Bitboard magic; - unsigned shift; -#endif - - // Compute the attack's index using the 'magic bitboards' approach - unsigned index(Bitboard occupied) const { - -#ifdef USE_PEXT - return unsigned(pext(occupied, mask)); -#else - if (Is64Bit) - return unsigned(((occupied & mask) * magic) >> shift); - - unsigned lo = unsigned(occupied) & unsigned(mask); - unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32); - return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift; -#endif - } - - Bitboard attacks_bb(Bitboard occupied) const { return attacks[index(occupied)]; } -}; - -extern Magic Magics[SQUARE_NB][2]; - -constexpr Bitboard square_bb(Square s) { - assert(is_ok(s)); - return 1ULL << s; -} - - -// Overloads of bitwise operators between a Bitboard and a Square for testing -// whether a given bit is set in a bitboard, and for setting and clearing bits. - -constexpr Bitboard operator&(Bitboard b, Square s) { return b & square_bb(s); } -constexpr Bitboard operator|(Bitboard b, Square s) { return b | square_bb(s); } -constexpr Bitboard operator^(Bitboard b, Square s) { return b ^ square_bb(s); } -constexpr Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); } -constexpr Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); } - -constexpr Bitboard operator&(Square s, Bitboard b) { return b & s; } -constexpr Bitboard operator|(Square s, Bitboard b) { return b | s; } -constexpr Bitboard operator^(Square s, Bitboard b) { return b ^ s; } - -constexpr Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; } - -constexpr bool more_than_one(Bitboard b) { return b & (b - 1); } - - -// rank_bb() and file_bb() return a bitboard representing all the squares on -// the given file or rank. - -constexpr Bitboard rank_bb(Rank r) { return Rank1BB << (8 * r); } - -constexpr Bitboard rank_bb(Square s) { return rank_bb(rank_of(s)); } - -constexpr Bitboard file_bb(File f) { return FileABB << f; } - -constexpr Bitboard file_bb(Square s) { return file_bb(file_of(s)); } - - -// Moves a bitboard one or two steps as specified by the direction D -template -constexpr Bitboard shift(Bitboard b) { - return D == NORTH ? b << 8 - : D == SOUTH ? b >> 8 - : D == NORTH + NORTH ? b << 16 - : D == SOUTH + SOUTH ? b >> 16 - : D == EAST ? (b & ~FileHBB) << 1 - : D == WEST ? (b & ~FileABB) >> 1 - : D == NORTH_EAST ? (b & ~FileHBB) << 9 - : D == NORTH_WEST ? (b & ~FileABB) << 7 - : D == SOUTH_EAST ? (b & ~FileHBB) >> 7 - : D == SOUTH_WEST ? (b & ~FileABB) >> 9 - : 0; -} - - -// Returns the squares attacked by pawns of the given color -// from the squares in the given bitboard. -template -constexpr Bitboard pawn_attacks_bb(Bitboard b) { - return C == WHITE ? shift(b) | shift(b) - : shift(b) | shift(b); -} - - -// Returns a bitboard representing an entire line (from board edge -// to board edge) that intersects the two given squares. If the given squares -// are not on a same file/rank/diagonal, the function returns 0. For instance, -// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal. -inline Bitboard line_bb(Square s1, Square s2) { - - assert(is_ok(s1) && is_ok(s2)); - return LineBB[s1][s2]; -} - - -// Returns a bitboard representing the squares in the semi-open -// segment between the squares s1 and s2 (excluding s1 but including s2). If the -// given squares are not on a same file/rank/diagonal, it returns s2. For instance, -// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but -// between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick -// allows to generate non-king evasion moves faster: the defending piece must either -// interpose itself to cover the check or capture the checking piece. -inline Bitboard between_bb(Square s1, Square s2) { - - assert(is_ok(s1) && is_ok(s2)); - return BetweenBB[s1][s2]; -} - -// distance() functions return the distance between x and y, defined as the -// number of steps for a king in x to reach y. - -template -inline int distance(Square x, Square y); - -template<> -inline int distance(Square x, Square y) { - return std::abs(file_of(x) - file_of(y)); -} - -template<> -inline int distance(Square x, Square y) { - return std::abs(rank_of(x) - rank_of(y)); -} - -template<> -inline int distance(Square x, Square y) { - return SquareDistance[x][y]; -} - -inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); } - - -constexpr int constexpr_popcount(Bitboard b) { - b = b - ((b >> 1) & 0x5555555555555555ULL); - b = (b & 0x3333333333333333ULL) + ((b >> 2) & 0x3333333333333333ULL); - b = (b + (b >> 4)) & 0x0F0F0F0F0F0F0F0FULL; - return static_cast((b * 0x0101010101010101ULL) >> 56); -} - -// Counts the number of non-zero bits in a bitboard. -inline int popcount(Bitboard b) { - -#ifndef USE_POPCNT - - std::uint16_t indices[4]; - std::memcpy(indices, &b, sizeof(b)); - return PopCnt16[indices[0]] + PopCnt16[indices[1]] + PopCnt16[indices[2]] - + PopCnt16[indices[3]]; - -#elif defined(_MSC_VER) - - return int(_mm_popcnt_u64(b)); - -#else // Assumed gcc or compatible compiler - - return __builtin_popcountll(b); - -#endif -} - -// Returns the least significant bit in a non-zero bitboard. -inline Square lsb(Bitboard b) { - assert(b); - -#if defined(__GNUC__) // GCC, Clang, ICX - - return Square(__builtin_ctzll(b)); - -#elif defined(_MSC_VER) - #ifdef _WIN64 // MSVC, WIN64 - - unsigned long idx; - _BitScanForward64(&idx, b); - return Square(idx); - - #else // MSVC, WIN32 - unsigned long idx; - - if (b & 0xffffffff) - { - _BitScanForward(&idx, int32_t(b)); - return Square(idx); - } - else - { - _BitScanForward(&idx, int32_t(b >> 32)); - return Square(idx + 32); - } - #endif -#else // Compiler is neither GCC nor MSVC compatible - #error "Compiler not supported." -#endif -} - -// Returns the most significant bit in a non-zero bitboard. -inline Square msb(Bitboard b) { - assert(b); - -#if defined(__GNUC__) // GCC, Clang, ICX - - return Square(63 ^ __builtin_clzll(b)); - -#elif defined(_MSC_VER) - #ifdef _WIN64 // MSVC, WIN64 - - unsigned long idx; - _BitScanReverse64(&idx, b); - return Square(idx); - - #else // MSVC, WIN32 - - unsigned long idx; - - if (b >> 32) - { - _BitScanReverse(&idx, int32_t(b >> 32)); - return Square(idx + 32); - } - else - { - _BitScanReverse(&idx, int32_t(b)); - return Square(idx); - } - #endif -#else // Compiler is neither GCC nor MSVC compatible - #error "Compiler not supported." -#endif -} - -// Returns the bitboard of the least significant -// square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)). -inline Bitboard least_significant_square_bb(Bitboard b) { - assert(b); - return b & -b; -} - -// Finds and clears the least significant bit in a non-zero bitboard. -inline Square pop_lsb(Bitboard& b) { - assert(b); - const Square s = lsb(b); - b &= b - 1; - return s; -} - -namespace Bitboards { -// Returns the bitboard of target square for the given step -// from the given square. If the step is off the board, returns empty bitboard. -constexpr Bitboard safe_destination(Square s, int step) { - constexpr auto abs = [](int v) { return v < 0 ? -v : v; }; - Square to = Square(s + step); - return is_ok(to) && abs(file_of(s) - file_of(to)) <= 2 ? square_bb(to) : Bitboard(0); -} - -constexpr Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { - Bitboard attacks = 0; - Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; - Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST}; - - for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) - { - Square s = sq; - while (safe_destination(s, d)) - { - attacks |= (s += d); - if (occupied & s) - { - break; - } - } - } - - return attacks; -} - -constexpr Bitboard knight_attack(Square sq) { - Bitboard b = {}; - for (int step : {-17, -15, -10, -6, 6, 10, 15, 17}) - b |= safe_destination(sq, step); - return b; -} - -constexpr Bitboard king_attack(Square sq) { - Bitboard b = {}; - for (int step : {-9, -8, -7, -1, 1, 7, 8, 9}) - b |= safe_destination(sq, step); - return b; -} - -constexpr Bitboard pseudo_attacks(PieceType pt, Square sq) { - switch (pt) - { - case PieceType::ROOK : - case PieceType::BISHOP : - return sliding_attack(pt, sq, 0); - case PieceType::QUEEN : - return sliding_attack(PieceType::ROOK, sq, 0) | sliding_attack(PieceType::BISHOP, sq, 0); - case PieceType::KNIGHT : - return knight_attack(sq); - case PieceType::KING : - return king_attack(sq); - default : - assert(false); - return 0; - } -} - -} - -inline constexpr auto PseudoAttacks = []() constexpr { - std::array, PIECE_TYPE_NB> attacks{}; - - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - { - attacks[WHITE][s1] = pawn_attacks_bb(square_bb(s1)); - attacks[BLACK][s1] = pawn_attacks_bb(square_bb(s1)); - - attacks[KING][s1] = Bitboards::pseudo_attacks(KING, s1); - attacks[KNIGHT][s1] = Bitboards::pseudo_attacks(KNIGHT, s1); - attacks[QUEEN][s1] = attacks[BISHOP][s1] = Bitboards::pseudo_attacks(BISHOP, s1); - attacks[QUEEN][s1] |= attacks[ROOK][s1] = Bitboards::pseudo_attacks(ROOK, s1); - } - - return attacks; -}(); - - -// Returns the pseudo attacks of the given piece type -// assuming an empty board. -template -inline Bitboard attacks_bb(Square s, Color c = COLOR_NB) { - - assert((Pt != PAWN || c < COLOR_NB) && is_ok(s)); - return Pt == PAWN ? PseudoAttacks[c][s] : PseudoAttacks[Pt][s]; -} - - -// Returns the attacks by the given piece -// assuming the board is occupied according to the passed Bitboard. -// Sliding piece attacks do not continue passed an occupied square. -template -inline Bitboard attacks_bb(Square s, Bitboard occupied) { - - assert(Pt != PAWN && is_ok(s)); - - switch (Pt) - { - case BISHOP : - case ROOK : - return Magics[s][Pt - BISHOP].attacks_bb(occupied); - case QUEEN : - return attacks_bb(s, occupied) | attacks_bb(s, occupied); - default : - return PseudoAttacks[Pt][s]; - } -} - -// Returns the attacks by the given piece -// assuming the board is occupied according to the passed Bitboard. -// Sliding piece attacks do not continue passed an occupied square. -inline Bitboard attacks_bb(PieceType pt, Square s, Bitboard occupied) { - - assert(pt != PAWN && is_ok(s)); - - switch (pt) - { - case BISHOP : - return attacks_bb(s, occupied); - case ROOK : - return attacks_bb(s, occupied); - case QUEEN : - return attacks_bb(s, occupied) | attacks_bb(s, occupied); - default : - return PseudoAttacks[pt][s]; - } -} - -inline Bitboard attacks_bb(Piece pc, Square s, Bitboard occupied) { - return type_of(pc) == PAWN ? PseudoAttacks[color_of(pc)][s] - : attacks_bb(type_of(pc), s, occupied); -} - -} // namespace Stockfish - -#endif // #ifndef BITBOARD_H_INCLUDED diff --git a/src/engine.cpp b/src/engine.cpp deleted file mode 100644 index be0fe3c4086e9aeede5707a97503679ef803d947..0000000000000000000000000000000000000000 --- a/src/engine.cpp +++ /dev/null @@ -1,411 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "engine.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "evaluate.h" -#include "misc.h" -#include "nnue/network.h" -#include "nnue/nnue_common.h" -#include "nnue/nnue_misc.h" -#include "numa.h" -#include "perft.h" -#include "position.h" -#include "search.h" -#include "shm.h" -#include "syzygy/tbprobe.h" -#include "types.h" -#include "uci.h" -#include "ucioption.h" - -namespace Stockfish { - -namespace NN = Eval::NNUE; - -constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; -constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; -int MaxThreads = std::max(1024, 4 * int(get_hardware_concurrency())); - -// The default configuration will attempt to group L3 domains up to 32 threads. -// This size was found to be a good balance between the Elo gain of increased -// history sharing and the speed loss from more cross-cache accesses (see -// PR#6526). The user can always explicitly override this behavior. -constexpr NumaAutoPolicy DefaultNumaPolicy = BundledL3Policy{32}; - -Engine::Engine(std::optional path) : - binaryDirectory(path ? CommandLine::get_binary_directory(*path) : ""), - numaContext(NumaConfig::from_system(DefaultNumaPolicy)), - states(new std::deque(1)), - threads(), - networks(numaContext, get_default_networks()) { - - pos.set(StartFEN, false, &states->back()); - - options.add( // - "Debug Log File", Option("", [](const Option& o) { - start_logger(o); - return std::nullopt; - })); - - options.add( // - "NumaPolicy", Option("auto", [this](const Option& o) { - set_numa_config_from_option(o); - return numa_config_information_as_string() + "\n" - + thread_allocation_information_as_string(); - })); - - options.add( // - "Threads", Option(1, 1, MaxThreads, [this](const Option&) { - resize_threads(); - return thread_allocation_information_as_string(); - })); - - options.add( // - "Hash", Option(16, 1, MaxHashMB, [this](const Option& o) { - set_tt_size(o); - return std::nullopt; - })); - - options.add( // - "Clear Hash", Option([this](const Option&) { - search_clear(); - return std::nullopt; - })); - - options.add( // - "Ponder", Option(false)); - - options.add( // - "MultiPV", Option(1, 1, MAX_MOVES)); - - options.add("Skill Level", Option(20, 0, 20)); - - options.add("Move Overhead", Option(10, 0, 5000)); - - options.add("nodestime", Option(0, 0, 10000)); - - options.add("UCI_Chess960", Option(false)); - - options.add("UCI_LimitStrength", Option(false)); - - options.add("UCI_Elo", - Option(Stockfish::Search::Skill::LowestElo, Stockfish::Search::Skill::LowestElo, - Stockfish::Search::Skill::HighestElo)); - - options.add("UCI_ShowWDL", Option(false)); - - options.add( // - "SyzygyPath", Option("", [](const Option& o) { - Tablebases::init(o); - return std::nullopt; - })); - - options.add("SyzygyProbeDepth", Option(1, 1, 100)); - - options.add("Syzygy50MoveRule", Option(true)); - - options.add("SyzygyProbeLimit", Option(7, 0, 7)); - - options.add( // - "EvalFile", Option(EvalFileDefaultNameBig, [this](const Option& o) { - load_big_network(o); - return std::nullopt; - })); - - options.add( // - "EvalFileSmall", Option(EvalFileDefaultNameSmall, [this](const Option& o) { - load_small_network(o); - return std::nullopt; - })); - - threads.clear(); - threads.ensure_network_replicated(); - resize_threads(); -} - -std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) { - verify_networks(); - - return Benchmark::perft(fen, depth, isChess960); -} - -void Engine::go(Search::LimitsType& limits) { - assert(limits.perft == 0); - verify_networks(); - - threads.start_thinking(options, pos, states, limits); -} -void Engine::stop() { threads.stop = true; } - -void Engine::search_clear() { - wait_for_search_finished(); - - tt.clear(threads); - threads.clear(); - - // @TODO wont work with multiple instances - Tablebases::init(options["SyzygyPath"]); // Free mapped files -} - -void Engine::set_on_update_no_moves(std::function&& f) { - updateContext.onUpdateNoMoves = std::move(f); -} - -void Engine::set_on_update_full(std::function&& f) { - updateContext.onUpdateFull = std::move(f); -} - -void Engine::set_on_iter(std::function&& f) { - updateContext.onIter = std::move(f); -} - -void Engine::set_on_bestmove(std::function&& f) { - updateContext.onBestmove = std::move(f); -} - -void Engine::set_on_verify_networks(std::function&& f) { - onVerifyNetworks = std::move(f); -} - -void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); } - -void Engine::set_position(const std::string& fen, const std::vector& moves) { - // Drop the old state and create a new one - states = StateListPtr(new std::deque(1)); - pos.set(fen, options["UCI_Chess960"], &states->back()); - - for (const auto& move : moves) - { - auto m = UCIEngine::to_move(pos, move); - - if (m == Move::none()) - break; - - states->emplace_back(); - pos.do_move(m, states->back()); - } -} - -// modifiers - -void Engine::set_numa_config_from_option(const std::string& o) { - if (o == "auto" || o == "system") - { - numaContext.set_numa_config(NumaConfig::from_system(DefaultNumaPolicy)); - } - else if (o == "hardware") - { - // Don't respect affinity set in the system. - numaContext.set_numa_config(NumaConfig::from_system(DefaultNumaPolicy, false)); - } - else if (o == "none") - { - numaContext.set_numa_config(NumaConfig{}); - } - else - { - numaContext.set_numa_config(NumaConfig::from_string(o)); - } - - // Force reallocation of threads in case affinities need to change. - resize_threads(); - threads.ensure_network_replicated(); -} - -void Engine::resize_threads() { - threads.wait_for_search_finished(); - threads.set(numaContext.get_numa_config(), {options, threads, tt, sharedHists, networks}, - updateContext); - - // Reallocate the hash with the new threadpool size - set_tt_size(options["Hash"]); - threads.ensure_network_replicated(); -} - -void Engine::set_tt_size(size_t mb) { - wait_for_search_finished(); - tt.resize(mb, threads); -} - -void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; } - -// network related - -void Engine::verify_networks() const { - networks->big.verify(options["EvalFile"], onVerifyNetworks); - networks->small.verify(options["EvalFileSmall"], onVerifyNetworks); - - auto statuses = networks.get_status_and_errors(); - for (size_t i = 0; i < statuses.size(); ++i) - { - const auto [status, error] = statuses[i]; - std::string message = "Network replica " + std::to_string(i + 1) + ": "; - if (status == SystemWideSharedConstantAllocationStatus::NoAllocation) - { - message += "No allocation."; - } - else if (status == SystemWideSharedConstantAllocationStatus::LocalMemory) - { - message += "Local memory."; - } - else if (status == SystemWideSharedConstantAllocationStatus::SharedMemory) - { - message += "Shared memory."; - } - else - { - message += "Unknown status."; - } - - if (error.has_value()) - { - message += " " + *error; - } - - onVerifyNetworks(message); - } -} - -std::unique_ptr Engine::get_default_networks() const { - - auto networks_ = - std::make_unique(NN::EvalFile{EvalFileDefaultNameBig, "None", ""}, - NN::EvalFile{EvalFileDefaultNameSmall, "None", ""}); - - networks_->big.load(binaryDirectory, ""); - networks_->small.load(binaryDirectory, ""); - - return networks_; -} - -void Engine::load_big_network(const std::string& file) { - networks.modify_and_replicate( - [this, &file](NN::Networks& networks_) { networks_.big.load(binaryDirectory, file); }); - threads.clear(); - threads.ensure_network_replicated(); -} - -void Engine::load_small_network(const std::string& file) { - networks.modify_and_replicate( - [this, &file](NN::Networks& networks_) { networks_.small.load(binaryDirectory, file); }); - threads.clear(); - threads.ensure_network_replicated(); -} - -void Engine::save_network(const std::pair, std::string> files[2]) { - networks.modify_and_replicate([&files](NN::Networks& networks_) { - networks_.big.save(files[0].first); - networks_.small.save(files[1].first); - }); -} - -// utility functions - -void Engine::trace_eval() const { - StateListPtr trace_states(new std::deque(1)); - Position p; - p.set(pos.fen(), options["UCI_Chess960"], &trace_states->back()); - - verify_networks(); - - sync_cout << "\n" << Eval::trace(p, *networks) << sync_endl; -} - -const OptionsMap& Engine::get_options() const { return options; } -OptionsMap& Engine::get_options() { return options; } - -std::string Engine::fen() const { return pos.fen(); } - -void Engine::flip() { pos.flip(); } - -std::string Engine::visualize() const { - std::stringstream ss; - ss << pos; - return ss.str(); -} - -int Engine::get_hashfull(int maxAge) const { return tt.hashfull(maxAge); } - -std::vector> Engine::get_bound_thread_count_by_numa_node() const { - auto counts = threads.get_bound_thread_count_by_numa_node(); - const NumaConfig& cfg = numaContext.get_numa_config(); - std::vector> ratios; - NumaIndex n = 0; - for (; n < counts.size(); ++n) - ratios.emplace_back(counts[n], cfg.num_cpus_in_numa_node(n)); - if (!counts.empty()) - for (; n < cfg.num_numa_nodes(); ++n) - ratios.emplace_back(0, cfg.num_cpus_in_numa_node(n)); - return ratios; -} - -std::string Engine::get_numa_config_as_string() const { - return numaContext.get_numa_config().to_string(); -} - -std::string Engine::numa_config_information_as_string() const { - auto cfgStr = get_numa_config_as_string(); - return "Available processors: " + cfgStr; -} - -std::string Engine::thread_binding_information_as_string() const { - auto boundThreadsByNode = get_bound_thread_count_by_numa_node(); - std::stringstream ss; - if (boundThreadsByNode.empty()) - return ss.str(); - - bool isFirst = true; - - for (auto&& [current, total] : boundThreadsByNode) - { - if (!isFirst) - ss << ":"; - ss << current << "/" << total; - isFirst = false; - } - - return ss.str(); -} - -std::string Engine::thread_allocation_information_as_string() const { - std::stringstream ss; - - size_t threadsSize = threads.size(); - ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread"); - - auto boundThreadsByNodeStr = thread_binding_information_as_string(); - if (boundThreadsByNodeStr.empty()) - return ss.str(); - - ss << " with NUMA node thread binding: "; - ss << boundThreadsByNodeStr; - - return ss.str(); -} -} diff --git a/src/engine.h b/src/engine.h deleted file mode 100644 index 92d6282dcd6fb56dffd59279e1bd1ee4be283b86..0000000000000000000000000000000000000000 --- a/src/engine.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef ENGINE_H_INCLUDED -#define ENGINE_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "history.h" -#include "nnue/network.h" -#include "numa.h" -#include "position.h" -#include "search.h" -#include "syzygy/tbprobe.h" // for Stockfish::Depth -#include "thread.h" -#include "tt.h" -#include "ucioption.h" - -namespace Stockfish { - -class Engine { - public: - using InfoShort = Search::InfoShort; - using InfoFull = Search::InfoFull; - using InfoIter = Search::InfoIteration; - - Engine(std::optional path = std::nullopt); - - // Cannot be movable due to components holding backreferences to fields - Engine(const Engine&) = delete; - Engine(Engine&&) = delete; - Engine& operator=(const Engine&) = delete; - Engine& operator=(Engine&&) = delete; - - ~Engine() { wait_for_search_finished(); } - - std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960); - - // non blocking call to start searching - void go(Search::LimitsType&); - // non blocking call to stop searching - void stop(); - - // blocking call to wait for search to finish - void wait_for_search_finished(); - // set a new position, moves are in UCI format - void set_position(const std::string& fen, const std::vector& moves); - - // modifiers - - void set_numa_config_from_option(const std::string& o); - void resize_threads(); - void set_tt_size(size_t mb); - void set_ponderhit(bool); - void search_clear(); - - void set_on_update_no_moves(std::function&&); - void set_on_update_full(std::function&&); - void set_on_iter(std::function&&); - void set_on_bestmove(std::function&&); - void set_on_verify_networks(std::function&&); - - // network related - - void verify_networks() const; - std::unique_ptr get_default_networks() const; - void load_big_network(const std::string& file); - void load_small_network(const std::string& file); - void save_network(const std::pair, std::string> files[2]); - - // utility functions - - void trace_eval() const; - - const OptionsMap& get_options() const; - OptionsMap& get_options(); - - int get_hashfull(int maxAge = 0) const; - - std::string fen() const; - void flip(); - std::string visualize() const; - std::vector> get_bound_thread_count_by_numa_node() const; - std::string get_numa_config_as_string() const; - std::string numa_config_information_as_string() const; - std::string thread_allocation_information_as_string() const; - std::string thread_binding_information_as_string() const; - - private: - const std::string binaryDirectory; - - NumaReplicationContext numaContext; - - Position pos; - StateListPtr states; - - OptionsMap options; - ThreadPool threads; - TranspositionTable tt; - LazyNumaReplicatedSystemWide networks; - - Search::SearchManager::UpdateContext updateContext; - std::function onVerifyNetworks; - std::map sharedHists; -}; - -} // namespace Stockfish - - -#endif // #ifndef ENGINE_H_INCLUDED diff --git a/src/evaluate.cpp b/src/evaluate.cpp deleted file mode 100644 index 745bd3e4d56f114737a726f967f5942727f3fe9b..0000000000000000000000000000000000000000 --- a/src/evaluate.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "evaluate.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "nnue/network.h" -#include "nnue/nnue_misc.h" -#include "position.h" -#include "types.h" -#include "uci.h" -#include "nnue/nnue_accumulator.h" - -namespace Stockfish { - -// Returns a static, purely materialistic evaluation of the position from -// the point of view of the side to move. It can be divided by PawnValue to get -// an approximation of the material advantage on the board in terms of pawns. -int Eval::simple_eval(const Position& pos) { - Color c = pos.side_to_move(); - return PawnValue * (pos.count(c) - pos.count(~c)) + pos.non_pawn_material(c) - - pos.non_pawn_material(~c); -} - -bool Eval::use_smallnet(const Position& pos) { return std::abs(simple_eval(pos)) > 962; } - -// Evaluate is the evaluator for the outer world. It returns a static evaluation -// of the position from the point of view of the side to move. -Value Eval::evaluate(const Eval::NNUE::Networks& networks, - const Position& pos, - Eval::NNUE::AccumulatorStack& accumulators, - Eval::NNUE::AccumulatorCaches& caches, - int optimism) { - - assert(!pos.checkers()); - - bool smallNet = use_smallnet(pos); - auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, accumulators, caches.small) - : networks.big.evaluate(pos, accumulators, caches.big); - - Value nnue = (125 * psqt + 131 * positional) / 128; - - // Re-evaluate the position when higher eval accuracy is worth the time spent - if (smallNet && (std::abs(nnue) < 277)) - { - std::tie(psqt, positional) = networks.big.evaluate(pos, accumulators, caches.big); - nnue = (125 * psqt + 131 * positional) / 128; - smallNet = false; - } - - // Blend optimism and eval with nnue complexity - int nnueComplexity = std::abs(psqt - positional); - optimism += optimism * nnueComplexity / 476; - nnue -= nnue * nnueComplexity / 18236; - - int material = 534 * pos.count() + pos.non_pawn_material(); - int v = (nnue * (77871 + material) + optimism * (7191 + material)) / 77871; - - // Damp down the evaluation linearly when shuffling - v -= v * pos.rule50_count() / 199; - - // Guarantee evaluation does not hit the tablebase range - v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); - - return v; -} - -// Like evaluate(), but instead of returning a value, it returns -// a string (suitable for outputting to stdout) that contains the detailed -// descriptions and values of each evaluation term. Useful for debugging. -// Trace scores are from white's point of view -std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) { - - if (pos.checkers()) - return "Final evaluation: none (in check)"; - - auto accumulators = std::make_unique(); - auto caches = std::make_unique(networks); - - std::stringstream ss; - ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2); - ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n'; - - ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); - - auto [psqt, positional] = networks.big.evaluate(pos, *accumulators, caches->big); - Value v = psqt + positional; - v = pos.side_to_move() == WHITE ? v : -v; - ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n"; - - v = evaluate(networks, pos, *accumulators, *caches, VALUE_ZERO); - v = pos.side_to_move() == WHITE ? v : -v; - ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)"; - ss << " [with scaled NNUE, ...]"; - ss << "\n"; - - return ss.str(); -} - -} // namespace Stockfish diff --git a/src/evaluate.h b/src/evaluate.h deleted file mode 100644 index 4af7093e0ac7737546bbac45fff2bb0bfd7811c8..0000000000000000000000000000000000000000 --- a/src/evaluate.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef EVALUATE_H_INCLUDED -#define EVALUATE_H_INCLUDED - -#include - -#include "types.h" - -namespace Stockfish { - -class Position; - -namespace Eval { - -// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue -// for the build process (profile-build and fishtest) to work. Do not change the -// name of the macro or the location where this macro is defined, as it is used -// in the Makefile/Fishtest. -#define EvalFileDefaultNameBig "nn-9a0cc2a62c52.nnue" -#define EvalFileDefaultNameSmall "nn-47fc8b7fff06.nnue" - -namespace NNUE { -struct Networks; -struct AccumulatorCaches; -class AccumulatorStack; -} - -std::string trace(Position& pos, const Eval::NNUE::Networks& networks); - -int simple_eval(const Position& pos); -bool use_smallnet(const Position& pos); -Value evaluate(const NNUE::Networks& networks, - const Position& pos, - Eval::NNUE::AccumulatorStack& accumulators, - Eval::NNUE::AccumulatorCaches& caches, - int optimism); -} // namespace Eval - -} // namespace Stockfish - -#endif // #ifndef EVALUATE_H_INCLUDED diff --git a/src/history.h b/src/history.h deleted file mode 100644 index c98a7ee223b108fabfe066a6d5dcba6ab17441c6..0000000000000000000000000000000000000000 --- a/src/history.h +++ /dev/null @@ -1,273 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef HISTORY_H_INCLUDED -#define HISTORY_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include -#include -#include // IWYU pragma: keep - -#include "memory.h" -#include "misc.h" -#include "position.h" - -namespace Stockfish { - -constexpr int PAWN_HISTORY_BASE_SIZE = 8192; // has to be a power of 2 -constexpr int UINT_16_HISTORY_SIZE = std::numeric_limits::max() + 1; -constexpr int CORRHIST_BASE_SIZE = UINT_16_HISTORY_SIZE; -constexpr int CORRECTION_HISTORY_LIMIT = 1024; -constexpr int LOW_PLY_HISTORY_SIZE = 5; - -static_assert((PAWN_HISTORY_BASE_SIZE & (PAWN_HISTORY_BASE_SIZE - 1)) == 0, - "PAWN_HISTORY_BASE_SIZE has to be a power of 2"); - -static_assert((CORRHIST_BASE_SIZE & (CORRHIST_BASE_SIZE - 1)) == 0, - "CORRHIST_BASE_SIZE has to be a power of 2"); - -// StatsEntry is the container of various numerical statistics. We use a class -// instead of a naked value to directly call history update operator<<() on -// the entry. The first template parameter T is the base type of the array, -// and the second template parameter D limits the range of updates in [-D, D] -// when we update values with the << operator -template -struct StatsEntry { - static_assert(std::is_arithmetic_v, "Not an arithmetic type"); - - private: - std::conditional_t, T> entry; - - public: - void operator=(const T& v) { - if constexpr (Atomic) - entry.store(v, std::memory_order_relaxed); - else - entry = v; - } - - operator T() const { - if constexpr (Atomic) - return entry.load(std::memory_order_relaxed); - else - return entry; - } - - void operator<<(int bonus) { - // Make sure that bonus is in range [-D, D] - int clampedBonus = std::clamp(bonus, -D, D); - T val = *this; - *this = val + clampedBonus - val * std::abs(clampedBonus) / D; - - assert(std::abs(T(*this)) <= D); - } -}; - -enum StatsType { - NoCaptures, - Captures -}; - -template -using Stats = MultiArray, Sizes...>; - -template -using AtomicStats = MultiArray, Sizes...>; - -// DynStats is a dynamically sized array of Stats, used for thread-shared histories -// which should scale with the total number of threads. The SizeMultiplier gives -// the per-thread allocation count of T. -template -struct DynStats { - explicit DynStats(size_t s) { - size = s * SizeMultiplier; - data = make_unique_large_page(size); - } - // Sets all values in the range to 0 - void clear_range(int value, size_t threadIdx, size_t numaTotal) { - size_t start = uint64_t(threadIdx) * size / numaTotal; - assert(start < size); - size_t end = threadIdx + 1 == numaTotal ? size : uint64_t(threadIdx + 1) * size / numaTotal; - - while (start < end) - data[start++].fill(value); - } - size_t get_size() const { return size; } - T& operator[](size_t index) { - assert(index < size); - return data.get()[index]; - } - const T& operator[](size_t index) const { - assert(index < size); - return data.get()[index]; - } - - private: - size_t size; - LargePagePtr data; -}; - -// ButterflyHistory records how often quiet moves have been successful or unsuccessful -// during the current search, and is used for reduction and move ordering decisions. -// It uses 2 tables (one for each color) indexed by the move's from and to squares, -// see https://www.chessprogramming.org/Butterfly_Boards -using ButterflyHistory = Stats; - -// LowPlyHistory is addressed by ply and move's from and to squares, used -// to improve move ordering near the root -using LowPlyHistory = Stats; - -// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] -using CapturePieceToHistory = Stats; - -// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to] -using PieceToHistory = Stats; - -// ContinuationHistory is the combined history of a given pair of moves, usually -// the current one given a previous one. The nested history table is based on -// PieceToHistory instead of ButterflyBoards. -using ContinuationHistory = MultiArray; - -// PawnHistory is addressed by the pawn structure and a move's [piece][to] -using PawnHistory = - DynStats, PAWN_HISTORY_BASE_SIZE>; - -// Correction histories record differences between the static evaluation of -// positions and their search score. It is used to improve the static evaluation -// used by some search heuristics. -// see https://www.chessprogramming.org/Static_Evaluation_Correction_History -enum CorrHistType { - Pawn, // By color and pawn structure - Minor, // By color and positions of minor pieces (Knight, Bishop) - NonPawn, // By non-pawn material positions and color - PieceTo, // By [piece][to] move - Continuation, // Combined history of move pairs -}; - -template -struct CorrectionBundle { - StatsEntry pawn; - StatsEntry minor; - StatsEntry nonPawnWhite; - StatsEntry nonPawnBlack; - - void operator=(T val) { - pawn = val; - minor = val; - nonPawnWhite = val; - nonPawnBlack = val; - } -}; - -namespace Detail { - -template -struct CorrHistTypedef { - using type = - DynStats, CORRHIST_BASE_SIZE>; -}; - -template<> -struct CorrHistTypedef { - using type = Stats; -}; - -template<> -struct CorrHistTypedef { - using type = MultiArray::type, PIECE_NB, SQUARE_NB>; -}; - -template<> -struct CorrHistTypedef { - using type = DynStats, - CORRHIST_BASE_SIZE>; -}; - -} - -using UnifiedCorrectionHistory = - DynStats, COLOR_NB>, - CORRHIST_BASE_SIZE>; - -template -using CorrectionHistory = typename Detail::CorrHistTypedef::type; - -using TTMoveHistory = StatsEntry; - -// Set of histories shared between groups of threads. To avoid excessive -// cross-node data transfer, histories are shared only between threads -// on a given NUMA node. The passed size must be a power of two to make -// the indexing more efficient. -struct SharedHistories { - SharedHistories(size_t threadCount) : - correctionHistory(threadCount), - pawnHistory(threadCount) { - assert((threadCount & (threadCount - 1)) == 0 && threadCount != 0); - sizeMinus1 = correctionHistory.get_size() - 1; - pawnHistSizeMinus1 = pawnHistory.get_size() - 1; - } - - size_t get_size() const { return sizeMinus1 + 1; } - - auto& pawn_entry(const Position& pos) { - return pawnHistory[pos.pawn_key() & pawnHistSizeMinus1]; - } - const auto& pawn_entry(const Position& pos) const { - return pawnHistory[pos.pawn_key() & pawnHistSizeMinus1]; - } - - auto& pawn_correction_entry(const Position& pos) { - return correctionHistory[pos.pawn_key() & sizeMinus1]; - } - const auto& pawn_correction_entry(const Position& pos) const { - return correctionHistory[pos.pawn_key() & sizeMinus1]; - } - - auto& minor_piece_correction_entry(const Position& pos) { - return correctionHistory[pos.minor_piece_key() & sizeMinus1]; - } - const auto& minor_piece_correction_entry(const Position& pos) const { - return correctionHistory[pos.minor_piece_key() & sizeMinus1]; - } - - template - auto& nonpawn_correction_entry(const Position& pos) { - return correctionHistory[pos.non_pawn_key(c) & sizeMinus1]; - } - template - const auto& nonpawn_correction_entry(const Position& pos) const { - return correctionHistory[pos.non_pawn_key(c) & sizeMinus1]; - } - - UnifiedCorrectionHistory correctionHistory; - PawnHistory pawnHistory; - - - private: - size_t sizeMinus1, pawnHistSizeMinus1; -}; - -} // namespace Stockfish - -#endif // #ifndef HISTORY_H_INCLUDED diff --git a/src/incbin/UNLICENCE b/src/incbin/UNLICENCE deleted file mode 100644 index 32484ab5e7026f9a1f15c2f8c08b1418802e02a8..0000000000000000000000000000000000000000 --- a/src/incbin/UNLICENCE +++ /dev/null @@ -1,26 +0,0 @@ -The file "incbin.h" is free and unencumbered software released into -the public domain by Dale Weiler, see: - - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h deleted file mode 100644 index 3f662e15dad1864dc6031c6491a21a6815994f1a..0000000000000000000000000000000000000000 --- a/src/incbin/incbin.h +++ /dev/null @@ -1,476 +0,0 @@ -/** - * @file incbin.h - * @author Dale Weiler - * @brief Utility for including binary files - * - * Facilities for including binary files into the current translation unit and - * making use from them externally in other translation units. - */ -#ifndef INCBIN_HDR -#define INCBIN_HDR -#include -#if defined(__AVX512BW__) || \ - defined(__AVX512CD__) || \ - defined(__AVX512DQ__) || \ - defined(__AVX512ER__) || \ - defined(__AVX512PF__) || \ - defined(__AVX512VL__) || \ - defined(__AVX512F__) -# define INCBIN_ALIGNMENT_INDEX 6 -#elif defined(__AVX__) || \ - defined(__AVX2__) -# define INCBIN_ALIGNMENT_INDEX 5 -#elif defined(__SSE__) || \ - defined(__SSE2__) || \ - defined(__SSE3__) || \ - defined(__SSSE3__) || \ - defined(__SSE4_1__) || \ - defined(__SSE4_2__) || \ - defined(__neon__) || \ - defined(__ARM_NEON) || \ - defined(__ALTIVEC__) -# define INCBIN_ALIGNMENT_INDEX 4 -#elif ULONG_MAX != 0xffffffffu -# define INCBIN_ALIGNMENT_INDEX 3 -# else -# define INCBIN_ALIGNMENT_INDEX 2 -#endif - -/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ -#define INCBIN_ALIGN_SHIFT_0 1 -#define INCBIN_ALIGN_SHIFT_1 2 -#define INCBIN_ALIGN_SHIFT_2 4 -#define INCBIN_ALIGN_SHIFT_3 8 -#define INCBIN_ALIGN_SHIFT_4 16 -#define INCBIN_ALIGN_SHIFT_5 32 -#define INCBIN_ALIGN_SHIFT_6 64 - -/* Actual alignment value */ -#define INCBIN_ALIGNMENT \ - INCBIN_CONCATENATE( \ - INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ - INCBIN_ALIGNMENT_INDEX) - -/* Stringize */ -#define INCBIN_STR(X) \ - #X -#define INCBIN_STRINGIZE(X) \ - INCBIN_STR(X) -/* Concatenate */ -#define INCBIN_CAT(X, Y) \ - X ## Y -#define INCBIN_CONCATENATE(X, Y) \ - INCBIN_CAT(X, Y) -/* Deferred macro expansion */ -#define INCBIN_EVAL(X) \ - X -#define INCBIN_INVOKE(N, ...) \ - INCBIN_EVAL(N(__VA_ARGS__)) -/* Variable argument count for overloading by arity */ -#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N -#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0) - -/* Green Hills uses a different directive for including binary data */ -#if defined(__ghs__) -# if (__ghs_asm == 2) -# define INCBIN_MACRO ".file" -/* Or consider the ".myrawdata" entry in the ld file */ -# else -# define INCBIN_MACRO "\tINCBIN" -# endif -#else -# define INCBIN_MACRO ".incbin" -#endif - -#ifndef _MSC_VER -# define INCBIN_ALIGN \ - __attribute__((aligned(INCBIN_ALIGNMENT))) -#else -# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) -#endif - -#if defined(__arm__) || /* GNU C and RealView */ \ - defined(__arm) || /* Diab */ \ - defined(_ARM) /* ImageCraft */ -# define INCBIN_ARM -#endif - -#ifdef __GNUC__ -/* Utilize .balign where supported */ -# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" -# define INCBIN_ALIGN_BYTE ".balign 1\n" -#elif defined(INCBIN_ARM) -/* - * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is - * the shift count. This is the value passed to `.align' - */ -# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" -# define INCBIN_ALIGN_BYTE ".align 0\n" -#else -/* We assume other inline assembler's treat `.align' as `.balign' */ -# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" -# define INCBIN_ALIGN_BYTE ".align 1\n" -#endif - -/* INCBIN_CONST is used by incbin.c generated files */ -#if defined(__cplusplus) -# define INCBIN_EXTERNAL extern "C" -# define INCBIN_CONST extern const -#else -# define INCBIN_EXTERNAL extern -# define INCBIN_CONST const -#endif - -/** - * @brief Optionally override the linker section into which size and data is - * emitted. - * - * @warning If you use this facility, you might have to deal with - * platform-specific linker output section naming on your own. - */ -#if !defined(INCBIN_OUTPUT_SECTION) -# if defined(__APPLE__) -# define INCBIN_OUTPUT_SECTION ".const_data" -# else -# define INCBIN_OUTPUT_SECTION ".rodata" -# endif -#endif - -/** - * @brief Optionally override the linker section into which data is emitted. - * - * @warning If you use this facility, you might have to deal with - * platform-specific linker output section naming on your own. - */ -#if !defined(INCBIN_OUTPUT_DATA_SECTION) -# define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION -#endif - -/** - * @brief Optionally override the linker section into which size is emitted. - * - * @warning If you use this facility, you might have to deal with - * platform-specific linker output section naming on your own. - * - * @note This is useful for Harvard architectures where program memory cannot - * be directly read from the program without special instructions. With this you - * can chose to put the size variable in RAM rather than ROM. - */ -#if !defined(INCBIN_OUTPUT_SIZE_SECTION) -# define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION -#endif - -#if defined(__APPLE__) -# include "TargetConditionals.h" -# if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING) -# warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." -# endif -/* The directives are different for Apple branded compilers */ -# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" -# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" -# define INCBIN_INT ".long " -# define INCBIN_MANGLE "_" -# define INCBIN_BYTE ".byte " -# define INCBIN_TYPE(...) -#else -# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" -# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" -# if defined(__ghs__) -# define INCBIN_INT ".word " -# else -# define INCBIN_INT ".int " -# endif -# if defined(__USER_LABEL_PREFIX__) -# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) -# else -# define INCBIN_MANGLE "" -# endif -# if defined(INCBIN_ARM) -/* On arm assemblers, `@' is used as a line comment token */ -# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" -# elif defined(__MINGW32__) || defined(__MINGW64__) -/* Mingw doesn't support this directive either */ -# define INCBIN_TYPE(NAME) -# else -/* It's safe to use `@' on other architectures */ -# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" -# endif -# define INCBIN_BYTE ".byte " -#endif - -/* List of style types used for symbol names */ -#define INCBIN_STYLE_CAMEL 0 -#define INCBIN_STYLE_SNAKE 1 - -/** - * @brief Specify the prefix to use for symbol names. - * - * @note By default this is "g". - * - * @code - * #define INCBIN_PREFIX incbin - * #include "incbin.h" - * INCBIN(Foo, "foo.txt"); - * - * // Now you have the following symbols instead: - * // const unsigned char incbinFoo[]; - * // const unsigned char *const incbinFoo; - * // const unsigned int incbinFoo; - * @endcode - */ -#if !defined(INCBIN_PREFIX) -# define INCBIN_PREFIX g -#endif - -/** - * @brief Specify the style used for symbol names. - * - * Possible options are - * - INCBIN_STYLE_CAMEL "CamelCase" - * - INCBIN_STYLE_SNAKE "snake_case" - * - * @note By default this is INCBIN_STYLE_CAMEL - * - * @code - * #define INCBIN_STYLE INCBIN_STYLE_SNAKE - * #include "incbin.h" - * INCBIN(foo, "foo.txt"); - * - * // Now you have the following symbols: - * // const unsigned char foo_data[]; - * // const unsigned char *const foo_end; - * // const unsigned int foo_size; - * @endcode - */ -#if !defined(INCBIN_STYLE) -# define INCBIN_STYLE INCBIN_STYLE_CAMEL -#endif - -/* Style lookup tables */ -#define INCBIN_STYLE_0_DATA Data -#define INCBIN_STYLE_0_END End -#define INCBIN_STYLE_0_SIZE Size -#define INCBIN_STYLE_1_DATA _data -#define INCBIN_STYLE_1_END _end -#define INCBIN_STYLE_1_SIZE _size - -/* Style lookup: returning identifier */ -#define INCBIN_STYLE_IDENT(TYPE) \ - INCBIN_CONCATENATE( \ - INCBIN_STYLE_, \ - INCBIN_CONCATENATE( \ - INCBIN_EVAL(INCBIN_STYLE), \ - INCBIN_CONCATENATE(_, TYPE))) - -/* Style lookup: returning string literal */ -#define INCBIN_STYLE_STRING(TYPE) \ - INCBIN_STRINGIZE( \ - INCBIN_STYLE_IDENT(TYPE)) \ - -/* Generate the global labels by indirectly invoking the macro with our style - * type and concatenating the name against them. */ -#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ - INCBIN_INVOKE( \ - INCBIN_GLOBAL, \ - INCBIN_CONCATENATE( \ - NAME, \ - INCBIN_INVOKE( \ - INCBIN_STYLE_IDENT, \ - TYPE))) \ - INCBIN_INVOKE( \ - INCBIN_TYPE, \ - INCBIN_CONCATENATE( \ - NAME, \ - INCBIN_INVOKE( \ - INCBIN_STYLE_IDENT, \ - TYPE))) - -/** - * @brief Externally reference binary data included in another translation unit. - * - * Produces three external symbols that reference the binary data included in - * another translation unit. - * - * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with - * "Data", as well as "End" and "Size" after. An example is provided below. - * - * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. - * @param NAME The name given for the binary data - * - * @code - * INCBIN_EXTERN(Foo); - * - * // Now you have the following symbols: - * // extern const unsigned char Foo[]; - * // extern const unsigned char *const Foo; - * // extern const unsigned int Foo; - * @endcode - * - * You may specify a custom optional data type as well as the first argument. - * @code - * INCBIN_EXTERN(custom_type, Foo); - * - * // Now you have the following symbols: - * // extern const custom_type Foo[]; - * // extern const custom_type *const Foo; - * // extern const unsigned int Foo; - * @endcode - */ -#define INCBIN_EXTERN(...) \ - INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) -#define INCBIN_EXTERN_1(NAME, ...) \ - INCBIN_EXTERN_2(unsigned char, NAME) -#define INCBIN_EXTERN_2(TYPE, NAME) \ - INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \ - INCBIN_CONCATENATE( \ - INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ - INCBIN_STYLE_IDENT(DATA))[]; \ - INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \ - INCBIN_CONCATENATE( \ - INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ - INCBIN_STYLE_IDENT(END)); \ - INCBIN_EXTERNAL const unsigned int \ - INCBIN_CONCATENATE( \ - INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ - INCBIN_STYLE_IDENT(SIZE)) - -/** - * @brief Externally reference textual data included in another translation unit. - * - * Produces three external symbols that reference the textual data included in - * another translation unit. - * - * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with - * "Data", as well as "End" and "Size" after. An example is provided below. - * - * @param NAME The name given for the textual data - * - * @code - * INCBIN_EXTERN(Foo); - * - * // Now you have the following symbols: - * // extern const char Foo[]; - * // extern const char *const Foo; - * // extern const unsigned int Foo; - * @endcode - */ -#define INCTXT_EXTERN(NAME) \ - INCBIN_EXTERN_2(char, NAME) - -/** - * @brief Include a binary file into the current translation unit. - * - * Includes a binary file into the current translation unit, producing three symbols - * for objects that encode the data and size respectively. - * - * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with - * "Data", as well as "End" and "Size" after. An example is provided below. - * - * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. - * @param NAME The name to associate with this binary data (as an identifier.) - * @param FILENAME The file to include (as a string literal.) - * - * @code - * INCBIN(Icon, "icon.png"); - * - * // Now you have the following symbols: - * // const unsigned char Icon[]; - * // const unsigned char *const Icon; - * // const unsigned int Icon; - * @endcode - * - * You may specify a custom optional data type as well as the first argument. - * These macros are specialized by arity. - * @code - * INCBIN(custom_type, Icon, "icon.png"); - * - * // Now you have the following symbols: - * // const custom_type Icon[]; - * // const custom_type *const Icon; - * // const unsigned int Icon; - * @endcode - * - * @warning This must be used in global scope - * @warning The identifiers may be different if INCBIN_STYLE is not default - * - * To externally reference the data included by this in another translation unit - * please @see INCBIN_EXTERN. - */ -#ifdef _MSC_VER -# define INCBIN(NAME, FILENAME) \ - INCBIN_EXTERN(NAME) -#else -# define INCBIN(...) \ - INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) -# if defined(__GNUC__) -# define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"") -# elif defined(__clang__) -# define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"") -# else -# define INCBIN_1(...) /* Cannot do anything here */ -# endif -# define INCBIN_2(NAME, FILENAME) \ - INCBIN_3(unsigned char, NAME, FILENAME) -# define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */) -# define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \ - __asm__(INCBIN_SECTION \ - INCBIN_GLOBAL_LABELS(NAME, DATA) \ - INCBIN_ALIGN_HOST \ - INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ - INCBIN_MACRO " \"" FILENAME "\"\n" \ - TERMINATOR \ - INCBIN_GLOBAL_LABELS(NAME, END) \ - INCBIN_ALIGN_BYTE \ - INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ - INCBIN_BYTE "1\n" \ - INCBIN_GLOBAL_LABELS(NAME, SIZE) \ - INCBIN_ALIGN_HOST \ - INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ - INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ - INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ - INCBIN_ALIGN_HOST \ - ".text\n" \ - ); \ - INCBIN_EXTERN(TYPE, NAME) -#endif - -/** - * @brief Include a textual file into the current translation unit. - * - * This behaves the same as INCBIN except it produces char compatible arrays - * and implicitly adds a null-terminator byte, thus the size of data included - * by this is one byte larger than that of INCBIN. - * - * Includes a textual file into the current translation unit, producing three - * symbols for objects that encode the data and size respectively. - * - * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with - * "Data", as well as "End" and "Size" after. An example is provided below. - * - * @param NAME The name to associate with this binary data (as an identifier.) - * @param FILENAME The file to include (as a string literal.) - * - * @code - * INCTXT(Readme, "readme.txt"); - * - * // Now you have the following symbols: - * // const char Readme[]; - * // const char *const Readme; - * // const unsigned int Readme; - * @endcode - * - * @warning This must be used in global scope - * @warning The identifiers may be different if INCBIN_STYLE is not default - * - * To externally reference the data included by this in another translation unit - * please @see INCBIN_EXTERN. - */ -#if defined(_MSC_VER) -# define INCTXT(NAME, FILENAME) \ - INCBIN_EXTERN(NAME) -#else -# define INCTXT(NAME, FILENAME) \ - INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n") -#endif - -#endif \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp deleted file mode 100644 index 9a7376efbaf3d7fd420ff215400b08047e603ff3..0000000000000000000000000000000000000000 --- a/src/main.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include - -#include "bitboard.h" -#include "misc.h" -#include "position.h" -#include "tune.h" -#include "uci.h" - -using namespace Stockfish; - -int main(int argc, char* argv[]) { - std::cout << engine_info() << std::endl; - - Bitboards::init(); - Position::init(); - - auto uci = std::make_unique(argc, argv); - - Tune::init(uci->engine_options()); - - uci->loop(); - - return 0; -} diff --git a/src/memory.cpp b/src/memory.cpp deleted file mode 100644 index 94a5993991b63685ca65b09d09e87f07af5cf9c1..0000000000000000000000000000000000000000 --- a/src/memory.cpp +++ /dev/null @@ -1,199 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "memory.h" - -#include - -#if __has_include("features.h") - #include -#endif - -#if defined(__linux__) && !defined(__ANDROID__) - #include -#endif - -#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \ - || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \ - || defined(__e2k__) - #define POSIXALIGNEDALLOC - #include -#endif - -#ifdef _WIN32 - #if _WIN32_WINNT < 0x0601 - #undef _WIN32_WINNT - #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes - #endif - - #ifndef NOMINMAX - #define NOMINMAX - #endif - - #include // std::hex, std::dec - #include // std::cerr - #include // std::endl - #include - -// The needed Windows API for processor groups could be missed from old Windows -// versions, so instead of calling them directly (forcing the linker to resolve -// the calls at compile time), try to load them at runtime. To do this we need -// first to define the corresponding function pointers. - -#endif - - -namespace Stockfish { - -// Wrappers for systems where the c++17 implementation does not guarantee the -// availability of aligned_alloc(). Memory allocated with std_aligned_alloc() -// must be freed with std_aligned_free(). - -void* std_aligned_alloc(size_t alignment, size_t size) { -#if defined(_ISOC11_SOURCE) - return aligned_alloc(alignment, size); -#elif defined(POSIXALIGNEDALLOC) - void* mem = nullptr; - posix_memalign(&mem, alignment, size); - return mem; -#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) - return _mm_malloc(size, alignment); -#elif defined(_WIN32) - return _aligned_malloc(size, alignment); -#else - return std::aligned_alloc(alignment, size); -#endif -} - -void std_aligned_free(void* ptr) { - -#if defined(POSIXALIGNEDALLOC) - free(ptr); -#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) - _mm_free(ptr); -#elif defined(_WIN32) - _aligned_free(ptr); -#else - free(ptr); -#endif -} - -// aligned_large_pages_alloc() will return suitably aligned memory, -// if possible using large pages. - -#if defined(_WIN32) - -static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) { - - return windows_try_with_large_page_priviliges( - [&](size_t largePageSize) { - // Round up size to full pages and allocate - allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); - return VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, - PAGE_READWRITE); - }, - []() { return (void*) nullptr; }); -} - -void* aligned_large_pages_alloc(size_t allocSize) { - - // Try to allocate large pages - void* mem = aligned_large_pages_alloc_windows(allocSize); - - // Fall back to regular, page-aligned, allocation if necessary - if (!mem) - mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); - - return mem; -} - -#else - -void* aligned_large_pages_alloc(size_t allocSize) { - - #if defined(__linux__) - constexpr size_t alignment = 2 * 1024 * 1024; // 2MB page size assumed - #else - constexpr size_t alignment = 4096; // small page size assumed - #endif - - // Round up to multiples of alignment - size_t size = ((allocSize + alignment - 1) / alignment) * alignment; - void* mem = std_aligned_alloc(alignment, size); - #if defined(MADV_HUGEPAGE) - madvise(mem, size, MADV_HUGEPAGE); - #endif - return mem; -} - -#endif - -bool has_large_pages() { - -#if defined(_WIN32) - - constexpr size_t page_size = 2 * 1024 * 1024; // 2MB page size assumed - void* mem = aligned_large_pages_alloc_windows(page_size); - if (mem == nullptr) - { - return false; - } - else - { - aligned_large_pages_free(mem); - return true; - } - -#elif defined(__linux__) - - #if defined(MADV_HUGEPAGE) - return true; - #else - return false; - #endif - -#else - - return false; - -#endif -} - - -// aligned_large_pages_free() will free the previously memory allocated -// by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr. - -#if defined(_WIN32) - -void aligned_large_pages_free(void* mem) { - - if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) - { - DWORD err = GetLastError(); - std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err - << std::dec << std::endl; - exit(EXIT_FAILURE); - } -} - -#else - -void aligned_large_pages_free(void* mem) { std_aligned_free(mem); } - -#endif -} // namespace Stockfish diff --git a/src/memory.h b/src/memory.h deleted file mode 100644 index 056b07c6d16593a634e53a6621a343bd43aea9f6..0000000000000000000000000000000000000000 --- a/src/memory.h +++ /dev/null @@ -1,333 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef MEMORY_H_INCLUDED -#define MEMORY_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include - -#include "types.h" - -#if defined(_WIN64) - - #if _WIN32_WINNT < 0x0601 - #undef _WIN32_WINNT - #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes - #endif - - #if !defined(NOMINMAX) - #define NOMINMAX - #endif - #include - - // Some Windows headers (RPC/old headers) define short macros such - // as 'small' expanding to 'char', which breaks identifiers in the code. - // Undefine those macros immediately after including . - #ifdef small - #undef small - #endif - - #include - -extern "C" { -using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE); -using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID); -using AdjustTokenPrivileges_t = - bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); -} -#endif - - -namespace Stockfish { - -void* std_aligned_alloc(size_t alignment, size_t size); -void std_aligned_free(void* ptr); - -// Memory aligned by page size, min alignment: 4096 bytes -void* aligned_large_pages_alloc(size_t size); -void aligned_large_pages_free(void* mem); - -bool has_large_pages(); - -// Frees memory which was placed there with placement new. -// Works for both single objects and arrays of unknown bound. -template -void memory_deleter(T* ptr, FREE_FUNC free_func) { - if (!ptr) - return; - - // Explicitly needed to call the destructor - if constexpr (!std::is_trivially_destructible_v) - ptr->~T(); - - free_func(ptr); -} - -// Frees memory which was placed there with placement new. -// Works for both single objects and arrays of unknown bound. -template -void memory_deleter_array(T* ptr, FREE_FUNC free_func) { - if (!ptr) - return; - - - // Move back on the pointer to where the size is allocated - const size_t array_offset = std::max(sizeof(size_t), alignof(T)); - char* raw_memory = reinterpret_cast(ptr) - array_offset; - - if constexpr (!std::is_trivially_destructible_v) - { - const size_t size = *reinterpret_cast(raw_memory); - - // Explicitly call the destructor for each element in reverse order - for (size_t i = size; i-- > 0;) - ptr[i].~T(); - } - - free_func(raw_memory); -} - -// Allocates memory for a single object and places it there with placement new -template -inline std::enable_if_t, T*> memory_allocator(ALLOC_FUNC alloc_func, - Args&&... args) { - void* raw_memory = alloc_func(sizeof(T)); - ASSERT_ALIGNED(raw_memory, alignof(T)); - return new (raw_memory) T(std::forward(args)...); -} - -// Allocates memory for an array of unknown bound and places it there with placement new -template -inline std::enable_if_t, std::remove_extent_t*> -memory_allocator(ALLOC_FUNC alloc_func, size_t num) { - using ElementType = std::remove_extent_t; - - const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType)); - - // Save the array size in the memory location - char* raw_memory = - reinterpret_cast(alloc_func(array_offset + num * sizeof(ElementType))); - ASSERT_ALIGNED(raw_memory, alignof(T)); - - new (raw_memory) size_t(num); - - for (size_t i = 0; i < num; ++i) - new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType(); - - // Need to return the pointer at the start of the array so that - // the indexing in unique_ptr works. - return reinterpret_cast(raw_memory + array_offset); -} - -// -// -// aligned large page unique ptr -// -// - -template -struct LargePageDeleter { - void operator()(T* ptr) const { return memory_deleter(ptr, aligned_large_pages_free); } -}; - -template -struct LargePageArrayDeleter { - void operator()(T* ptr) const { return memory_deleter_array(ptr, aligned_large_pages_free); } -}; - -template -using LargePagePtr = - std::conditional_t, - std::unique_ptr>>, - std::unique_ptr>>; - -// make_unique_large_page for single objects -template -std::enable_if_t, LargePagePtr> make_unique_large_page(Args&&... args) { - static_assert(alignof(T) <= 4096, - "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); - - T* obj = memory_allocator(aligned_large_pages_alloc, std::forward(args)...); - - return LargePagePtr(obj); -} - -// make_unique_large_page for arrays of unknown bound -template -std::enable_if_t, LargePagePtr> make_unique_large_page(size_t num) { - using ElementType = std::remove_extent_t; - - static_assert(alignof(ElementType) <= 4096, - "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); - - ElementType* memory = memory_allocator(aligned_large_pages_alloc, num); - - return LargePagePtr(memory); -} - -// -// -// aligned unique ptr -// -// - -template -struct AlignedDeleter { - void operator()(T* ptr) const { return memory_deleter(ptr, std_aligned_free); } -}; - -template -struct AlignedArrayDeleter { - void operator()(T* ptr) const { return memory_deleter_array(ptr, std_aligned_free); } -}; - -template -using AlignedPtr = - std::conditional_t, - std::unique_ptr>>, - std::unique_ptr>>; - -// make_unique_aligned for single objects -template -std::enable_if_t, AlignedPtr> make_unique_aligned(Args&&... args) { - const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); }; - T* obj = memory_allocator(func, std::forward(args)...); - - return AlignedPtr(obj); -} - -// make_unique_aligned for arrays of unknown bound -template -std::enable_if_t, AlignedPtr> make_unique_aligned(size_t num) { - using ElementType = std::remove_extent_t; - - const auto func = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); }; - ElementType* memory = memory_allocator(func, num); - - return AlignedPtr(memory); -} - - -// Get the first aligned element of an array. -// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes, -// where N is the number of elements in the array. -template -T* align_ptr_up(T* ptr) { - static_assert(alignof(T) < Alignment); - - const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr)); - return reinterpret_cast( - reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); -} - -#if defined(_WIN32) - -template -auto windows_try_with_large_page_priviliges([[maybe_unused]] FuncYesT&& fyes, FuncNoT&& fno) { - - #if !defined(_WIN64) - return fno(); - #else - - HANDLE hProcessToken{}; - LUID luid{}; - - const size_t largePageSize = GetLargePageMinimum(); - if (!largePageSize) - return fno(); - - // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges - - HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll")); - - if (!hAdvapi32) - hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); - - auto OpenProcessToken_f = - OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken")); - if (!OpenProcessToken_f) - return fno(); - auto LookupPrivilegeValueA_f = - LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA")); - if (!LookupPrivilegeValueA_f) - return fno(); - auto AdjustTokenPrivileges_f = - AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges")); - if (!AdjustTokenPrivileges_f) - return fno(); - - // We need SeLockMemoryPrivilege, so try to enable it for the process - - if (!OpenProcessToken_f( // OpenProcessToken() - GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) - return fno(); - - if (!LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid)) - return fno(); - - TOKEN_PRIVILEGES tp{}; - TOKEN_PRIVILEGES prevTp{}; - DWORD prevTpLen = 0; - - tp.PrivilegeCount = 1; - tp.Privileges[0].Luid = luid; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - - // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() - // succeeds, we still need to query GetLastError() to ensure that the privileges - // were actually obtained. - - if (!AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, - &prevTpLen) - || GetLastError() != ERROR_SUCCESS) - return fno(); - - auto&& ret = fyes(largePageSize); - - // Privilege no longer needed, restore previous state - AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); - - CloseHandle(hProcessToken); - - return std::forward(ret); - - #endif -} - -#endif - -template -T load_as(const ByteT* buffer) { - static_assert(std::is_trivially_copyable::value, "Type must be trivially copyable"); - static_assert(sizeof(ByteT) == 1); - - T value; - std::memcpy(&value, buffer, sizeof(T)); - - return value; -} - -} // namespace Stockfish - -#endif // #ifndef MEMORY_H_INCLUDED diff --git a/src/misc.cpp b/src/misc.cpp deleted file mode 100644 index 2cb3ce5d492d981dc3da5cab9ff1a2fc18cf08d2..0000000000000000000000000000000000000000 --- a/src/misc.cpp +++ /dev/null @@ -1,549 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "misc.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "types.h" - -namespace Stockfish { - -namespace { - -// Version number or dev. -constexpr std::string_view version = "dev"; - -// Our fancy logging facility. The trick here is to replace cin.rdbuf() and -// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We -// can toggle the logging of std::cout and std::cin at runtime whilst preserving -// usual I/O functionality, all without changing a single line of code! -// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81 - -struct Tie: public std::streambuf { // MSVC requires split streambuf for cin and cout - - Tie(std::streambuf* b, std::streambuf* l) : - buf(b), - logBuf(l) {} - - int sync() override { return logBuf->pubsync(), buf->pubsync(); } - int overflow(int c) override { return log(buf->sputc(char(c)), "<< "); } - int underflow() override { return buf->sgetc(); } - int uflow() override { return log(buf->sbumpc(), ">> "); } - - std::streambuf *buf, *logBuf; - - int log(int c, const char* prefix) { - - static int last = '\n'; // Single log file - - if (last == '\n') - logBuf->sputn(prefix, 3); - - return last = logBuf->sputc(char(c)); - } -}; - -class Logger { - - Logger() : - in(std::cin.rdbuf(), file.rdbuf()), - out(std::cout.rdbuf(), file.rdbuf()) {} - ~Logger() { start(""); } - - std::ofstream file; - Tie in, out; - - public: - static void start(const std::string& fname) { - - static Logger l; - - if (l.file.is_open()) - { - std::cout.rdbuf(l.out.buf); - std::cin.rdbuf(l.in.buf); - l.file.close(); - } - - if (!fname.empty()) - { - l.file.open(fname, std::ifstream::out); - - if (!l.file.is_open()) - { - std::cerr << "Unable to open debug log file " << fname << std::endl; - exit(EXIT_FAILURE); - } - - std::cin.rdbuf(&l.in); - std::cout.rdbuf(&l.out); - } - } -}; - -} // namespace - - -// Returns the full name of the current Stockfish version. -// -// For local dev compiles we try to append the commit SHA and -// commit date from git. If that fails only the local compilation -// date is set and "nogit" is specified: -// Stockfish dev-YYYYMMDD-SHA -// or -// Stockfish dev-YYYYMMDD-nogit -// -// For releases (non-dev builds) we only include the version number: -// Stockfish version -std::string engine_version_info() { - std::stringstream ss; - ss << "Stockfish " << version << std::setfill('0'); - - if constexpr (version == "dev") - { - ss << "-"; -#ifdef GIT_DATE - ss << stringify(GIT_DATE); -#else - constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); - - std::string month, day, year; - std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" - - date >> month >> day >> year; - ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4) - << std::setw(2) << std::setfill('0') << day; -#endif - - ss << "-"; - -#ifdef GIT_SHA - ss << stringify(GIT_SHA); -#else - ss << "nogit"; -#endif - } - - return ss.str(); -} - -std::string engine_info(bool to_uci) { - return engine_version_info() + (to_uci ? "\nid author " : " by ") - + "the Stockfish developers (see AUTHORS file)"; -} - - -// Returns a string trying to describe the compiler we use -std::string compiler_info() { - -#define make_version_string(major, minor, patch) \ - stringify(major) "." stringify(minor) "." stringify(patch) - - // Predefined macros hell: - // - // __GNUC__ Compiler is GCC, Clang or ICX - // __clang__ Compiler is Clang or ICX - // __INTEL_LLVM_COMPILER Compiler is ICX - // _MSC_VER Compiler is MSVC - // _WIN32 Building on Windows (any) - // _WIN64 Building on Windows 64 bit - - std::string compiler = "\nCompiled by : "; - -#if defined(__INTEL_LLVM_COMPILER) - compiler += "ICX "; - compiler += stringify(__INTEL_LLVM_COMPILER); -#elif defined(__clang__) - compiler += "clang++ "; - compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__); -#elif _MSC_VER - compiler += "MSVC "; - compiler += "(version "; - compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD); - compiler += ")"; -#elif defined(__e2k__) && defined(__LCC__) - #define dot_ver2(n) \ - compiler += char('.'); \ - compiler += char('0' + (n) / 10); \ - compiler += char('0' + (n) % 10); - - compiler += "MCST LCC "; - compiler += "(version "; - compiler += std::to_string(__LCC__ / 100); - dot_ver2(__LCC__ % 100) dot_ver2(__LCC_MINOR__) compiler += ")"; -#elif __GNUC__ - compiler += "g++ (GNUC) "; - compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); -#else - compiler += "Unknown compiler "; - compiler += "(unknown version)"; -#endif - -#if defined(__APPLE__) - compiler += " on Apple"; -#elif defined(__CYGWIN__) - compiler += " on Cygwin"; -#elif defined(__MINGW64__) - compiler += " on MinGW64"; -#elif defined(__MINGW32__) - compiler += " on MinGW32"; -#elif defined(__ANDROID__) - compiler += " on Android"; -#elif defined(__linux__) - compiler += " on Linux"; -#elif defined(_WIN64) - compiler += " on Microsoft Windows 64-bit"; -#elif defined(_WIN32) - compiler += " on Microsoft Windows 32-bit"; -#else - compiler += " on unknown system"; -#endif - - compiler += "\nCompilation architecture : "; -#if defined(ARCH) - compiler += stringify(ARCH); -#else - compiler += "(undefined architecture)"; -#endif - - compiler += "\nCompilation settings : "; - compiler += (Is64Bit ? "64bit" : "32bit"); -#if defined(USE_AVX512ICL) - compiler += " AVX512ICL"; -#endif -#if defined(USE_VNNI) - compiler += " VNNI"; -#endif -#if defined(USE_AVX512) - compiler += " AVX512"; -#endif - compiler += (HasPext ? " BMI2" : ""); -#if defined(USE_AVX2) - compiler += " AVX2"; -#endif -#if defined(USE_SSE41) - compiler += " SSE41"; -#endif -#if defined(USE_SSSE3) - compiler += " SSSE3"; -#endif -#if defined(USE_SSE2) - compiler += " SSE2"; -#endif -#if defined(USE_NEON_DOTPROD) - compiler += " NEON_DOTPROD"; -#elif defined(USE_NEON) - compiler += " NEON"; -#endif - compiler += (HasPopCnt ? " POPCNT" : ""); - -#if !defined(NDEBUG) - compiler += " DEBUG"; -#endif - - compiler += "\nCompiler __VERSION__ macro : "; -#ifdef __VERSION__ - compiler += __VERSION__; -#else - compiler += "(undefined macro)"; -#endif - - compiler += "\n"; - - return compiler; -} - - -// Debug functions used mainly to collect run-time statistics -constexpr int MaxDebugSlots = 32; - -namespace { - -template -struct DebugInfo { - std::array, N> data = {0}; - - [[nodiscard]] constexpr std::atomic& operator[](size_t index) { - assert(index < N); - return data[index]; - } - - constexpr DebugInfo& operator=(const DebugInfo& other) { - for (size_t i = 0; i < N; i++) - data[i].store(other.data[i].load()); - return *this; - } -}; - -struct DebugExtremes: public DebugInfo<3> { - DebugExtremes() { - data[1] = std::numeric_limits::min(); - data[2] = std::numeric_limits::max(); - } -}; - -std::array, MaxDebugSlots> hit; -std::array, MaxDebugSlots> mean; -std::array, MaxDebugSlots> stdev; -std::array, MaxDebugSlots> correl; -std::array extremes; - -} // namespace - -void dbg_hit_on(bool cond, int slot) { - - ++hit.at(slot)[0]; - if (cond) - ++hit.at(slot)[1]; -} - -void dbg_mean_of(int64_t value, int slot) { - - ++mean.at(slot)[0]; - mean.at(slot)[1] += value; -} - -void dbg_stdev_of(int64_t value, int slot) { - - ++stdev.at(slot)[0]; - stdev.at(slot)[1] += value; - stdev.at(slot)[2] += value * value; -} - -void dbg_extremes_of(int64_t value, int slot) { - ++extremes.at(slot)[0]; - - int64_t current_max = extremes.at(slot)[1].load(); - while (current_max < value && !extremes.at(slot)[1].compare_exchange_weak(current_max, value)) - {} - - int64_t current_min = extremes.at(slot)[2].load(); - while (current_min > value && !extremes.at(slot)[2].compare_exchange_weak(current_min, value)) - {} -} - -void dbg_correl_of(int64_t value1, int64_t value2, int slot) { - - ++correl.at(slot)[0]; - correl.at(slot)[1] += value1; - correl.at(slot)[2] += value1 * value1; - correl.at(slot)[3] += value2; - correl.at(slot)[4] += value2 * value2; - correl.at(slot)[5] += value1 * value2; -} - -void dbg_print() { - - int64_t n; - auto E = [&n](int64_t x) { return double(x) / n; }; - auto sqr = [](double x) { return x * x; }; - - for (int i = 0; i < MaxDebugSlots; ++i) - if ((n = hit[i][0])) - std::cerr << "Hit #" << i << ": Total " << n << " Hits " << hit[i][1] - << " Hit Rate (%) " << 100.0 * E(hit[i][1]) << std::endl; - - for (int i = 0; i < MaxDebugSlots; ++i) - if ((n = mean[i][0])) - { - std::cerr << "Mean #" << i << ": Total " << n << " Mean " << E(mean[i][1]) << std::endl; - } - - for (int i = 0; i < MaxDebugSlots; ++i) - if ((n = stdev[i][0])) - { - double r = sqrt(E(stdev[i][2]) - sqr(E(stdev[i][1]))); - std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl; - } - - for (int i = 0; i < MaxDebugSlots; ++i) - if ((n = extremes[i][0])) - { - std::cerr << "Extremity #" << i << ": Total " << n << " Min " << extremes[i][2] - << " Max " << extremes[i][1] << std::endl; - } - - for (int i = 0; i < MaxDebugSlots; ++i) - if ((n = correl[i][0])) - { - double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3])) - / (sqrt(E(correl[i][2]) - sqr(E(correl[i][1]))) - * sqrt(E(correl[i][4]) - sqr(E(correl[i][3])))); - std::cerr << "Correl. #" << i << ": Total " << n << " Coefficient " << r << std::endl; - } -} - -void dbg_clear() { - hit.fill({}); - mean.fill({}); - stdev.fill({}); - correl.fill({}); - extremes.fill({}); -} - -// Used to serialize access to std::cout -// to avoid multiple threads writing at the same time. -std::ostream& operator<<(std::ostream& os, SyncCout sc) { - - static std::mutex m; - - if (sc == IO_LOCK) - m.lock(); - - if (sc == IO_UNLOCK) - m.unlock(); - - return os; -} - -void sync_cout_start() { std::cout << IO_LOCK; } -void sync_cout_end() { std::cout << IO_UNLOCK; } - -// Hash function based on public domain MurmurHash64A, by Austin Appleby. -uint64_t hash_bytes(const char* data, size_t size) { - const uint64_t m = 0xc6a4a7935bd1e995ull; - const int r = 47; - - uint64_t h = size * m; - - const char* end = data + (size & ~(size_t) 7); - - for (const char* p = data; p != end; p += 8) - { - uint64_t k; - std::memcpy(&k, p, sizeof(k)); - - k *= m; - k ^= k >> r; - k *= m; - - h ^= k; - h *= m; - } - - if (size & 7) - { - uint64_t k = 0; - for (int i = (size & 7) - 1; i >= 0; i--) - k = (k << 8) | (uint64_t) end[i]; - - h ^= k; - h *= m; - } - - h ^= h >> r; - h *= m; - h ^= h >> r; - - return h; -} - -// Trampoline helper to avoid moving Logger to misc.h -void start_logger(const std::string& fname) { Logger::start(fname); } - - -#ifdef _WIN32 - #include - #define GETCWD _getcwd -#else - #include - #define GETCWD getcwd -#endif - -size_t str_to_size_t(const std::string& s) { - unsigned long long value = std::stoull(s); - if (value > std::numeric_limits::max()) - std::exit(EXIT_FAILURE); - return static_cast(value); -} - -std::optional read_file_to_string(const std::string& path) { - std::ifstream f(path, std::ios_base::binary); - if (!f) - return std::nullopt; - return std::string(std::istreambuf_iterator(f), std::istreambuf_iterator()); -} - -void remove_whitespace(std::string& s) { - s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end()); -} - -bool is_whitespace(std::string_view s) { - return std::all_of(s.begin(), s.end(), [](char c) { return std::isspace(c); }); -} - -std::string CommandLine::get_binary_directory(std::string argv0) { - std::string pathSeparator; - -#ifdef _WIN32 - pathSeparator = "\\"; - #ifdef _MSC_VER - // Under windows argv[0] may not have the extension. Also _get_pgmptr() had - // issues in some Windows 10 versions, so check returned values carefully. - char* pgmptr = nullptr; - if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) - argv0 = pgmptr; - #endif -#else - pathSeparator = "/"; -#endif - - // Extract the working directory - auto workingDirectory = CommandLine::get_working_directory(); - - // Extract the binary directory path from argv0 - auto binaryDirectory = argv0; - size_t pos = binaryDirectory.find_last_of("\\/"); - if (pos == std::string::npos) - binaryDirectory = "." + pathSeparator; - else - binaryDirectory.resize(pos + 1); - - // Pattern replacement: "./" at the start of path is replaced by the working directory - if (binaryDirectory.find("." + pathSeparator) == 0) - binaryDirectory.replace(0, 1, workingDirectory); - - return binaryDirectory; -} - -std::string CommandLine::get_working_directory() { - std::string workingDirectory = ""; - char buff[40000]; - char* cwd = GETCWD(buff, 40000); - if (cwd) - workingDirectory = cwd; - - return workingDirectory; -} - - -} // namespace Stockfish diff --git a/src/misc.h b/src/misc.h deleted file mode 100644 index d1c368fdd2459b429deb6d9548e93c3cdbd17eeb..0000000000000000000000000000000000000000 --- a/src/misc.h +++ /dev/null @@ -1,538 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef MISC_H_INCLUDED -#define MISC_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include // IWYU pragma: keep -// IWYU pragma: no_include <__exception/terminate.h> -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if !defined(NO_PREFETCH) && (defined(_MSC_VER) || defined(__INTEL_COMPILER)) - #include -#endif - -#define stringify2(x) #x -#define stringify(x) stringify2(x) - -namespace Stockfish { - -std::string engine_version_info(); -std::string engine_info(bool to_uci = false); -std::string compiler_info(); - -// Prefetch hint enums for explicit call-site control. -enum class PrefetchRw { - READ, - WRITE -}; - -// NOTE: PrefetchLoc controls locality / cache level, not whether a prefetch -// is issued. In particular, PrefetchLoc::NONE maps to a non-temporal / -// lowest-locality prefetch (Intel: _MM_HINT_NTA, GCC/Clang: locality = 0) -// and therefore still performs a prefetch. To completely disable -// prefetching, define NO_PREFETCH so that prefetch() becomes a no-op. -enum class PrefetchLoc { - NONE, // Non-temporal / no cache locality (still issues a prefetch) - LOW, // Low locality (e.g. T2 / L2) - MODERATE, // Moderate locality (e.g. T1 / L1) - HIGH // High locality (e.g. T0 / closest cache) -}; - -// Preloads the given address into cache. This is a non-blocking -// function that doesn't stall the CPU waiting for data to be loaded from memory, -// which can be quite slow. -#ifdef NO_PREFETCH -template -void prefetch(const void*) {} -#elif defined(_MSC_VER) || defined(__INTEL_COMPILER) - -constexpr int get_intel_hint(PrefetchRw rw, PrefetchLoc loc) { - if (rw == PrefetchRw::WRITE) - { - #ifdef _MM_HINT_ET0 - return _MM_HINT_ET0; - #else - // Fallback when write-prefetch hint is not available: use T0 - return _MM_HINT_T0; - #endif - } - switch (loc) - { - case PrefetchLoc::NONE : - return _MM_HINT_NTA; - case PrefetchLoc::LOW : - return _MM_HINT_T2; - case PrefetchLoc::MODERATE : - return _MM_HINT_T1; - case PrefetchLoc::HIGH : - return _MM_HINT_T0; - default : - return _MM_HINT_T0; - } -} - -template -void prefetch(const void* addr) { - _mm_prefetch(static_cast(addr), get_intel_hint(RW, LOC)); -} -#else -template -void prefetch(const void* addr) { - __builtin_prefetch(addr, static_cast(RW), static_cast(LOC)); -} -#endif - -void start_logger(const std::string& fname); - -size_t str_to_size_t(const std::string& s); - -#if defined(__linux__) - -struct PipeDeleter { - void operator()(FILE* file) const { - if (file != nullptr) - { - pclose(file); - } - } -}; - -#endif - -// Reads the file as bytes. -// Returns std::nullopt if the file does not exist. -std::optional read_file_to_string(const std::string& path); - -void dbg_hit_on(bool cond, int slot = 0); -void dbg_mean_of(int64_t value, int slot = 0); -void dbg_stdev_of(int64_t value, int slot = 0); -void dbg_extremes_of(int64_t value, int slot = 0); -void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0); -void dbg_print(); -void dbg_clear(); - -using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds -static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); -inline TimePoint now() { - return std::chrono::duration_cast( - std::chrono::steady_clock::now().time_since_epoch()) - .count(); -} - -inline std::vector split(std::string_view s, std::string_view delimiter) { - std::vector res; - - if (s.empty()) - return res; - - size_t begin = 0; - for (;;) - { - const size_t end = s.find(delimiter, begin); - if (end == std::string::npos) - break; - - res.emplace_back(s.substr(begin, end - begin)); - begin = end + delimiter.size(); - } - - res.emplace_back(s.substr(begin)); - - return res; -} - -void remove_whitespace(std::string& s); -bool is_whitespace(std::string_view s); - -enum SyncCout { - IO_LOCK, - IO_UNLOCK -}; -std::ostream& operator<<(std::ostream&, SyncCout); - -#define sync_cout std::cout << IO_LOCK -#define sync_endl std::endl << IO_UNLOCK - -void sync_cout_start(); -void sync_cout_end(); - -// True if and only if the binary is compiled on a little-endian machine -static inline const std::uint16_t Le = 1; -static inline const bool IsLittleEndian = *reinterpret_cast(&Le) == 1; - - -template -class ValueList { - - public: - std::size_t size() const { return size_; } - int ssize() const { return int(size_); } - void push_back(const T& value) { - assert(size_ < MaxSize); - values_[size_++] = value; - } - const T* begin() const { return values_; } - const T* end() const { return values_ + size_; } - const T& operator[](int index) const { return values_[index]; } - - T* make_space(size_t count) { - T* result = &values_[size_]; - size_ += count; - assert(size_ <= MaxSize); - return result; - } - - private: - T values_[MaxSize]; - std::size_t size_ = 0; -}; - - -template -class MultiArray; - -namespace Detail { - -template -struct MultiArrayHelper { - using ChildType = MultiArray; -}; - -template -struct MultiArrayHelper { - using ChildType = T; -}; - -template -constexpr bool is_strictly_assignable_v = - std::is_assignable_v && (std::is_same_v || !std::is_convertible_v); - -} - -// MultiArray is a generic N-dimensional array. -// The template parameters (Size and Sizes) encode the dimensions of the array. -template -class MultiArray { - using ChildType = typename Detail::MultiArrayHelper::ChildType; - using ArrayType = std::array; - ArrayType data_; - - public: - using value_type = typename ArrayType::value_type; - using size_type = typename ArrayType::size_type; - using difference_type = typename ArrayType::difference_type; - using reference = typename ArrayType::reference; - using const_reference = typename ArrayType::const_reference; - using pointer = typename ArrayType::pointer; - using const_pointer = typename ArrayType::const_pointer; - using iterator = typename ArrayType::iterator; - using const_iterator = typename ArrayType::const_iterator; - using reverse_iterator = typename ArrayType::reverse_iterator; - using const_reverse_iterator = typename ArrayType::const_reverse_iterator; - - constexpr auto& at(size_type index) noexcept { return data_.at(index); } - constexpr const auto& at(size_type index) const noexcept { return data_.at(index); } - - constexpr auto& operator[](size_type index) noexcept { return data_[index]; } - constexpr const auto& operator[](size_type index) const noexcept { return data_[index]; } - - constexpr auto& front() noexcept { return data_.front(); } - constexpr const auto& front() const noexcept { return data_.front(); } - constexpr auto& back() noexcept { return data_.back(); } - constexpr const auto& back() const noexcept { return data_.back(); } - - auto* data() { return data_.data(); } - const auto* data() const { return data_.data(); } - - constexpr auto begin() noexcept { return data_.begin(); } - constexpr auto end() noexcept { return data_.end(); } - constexpr auto begin() const noexcept { return data_.begin(); } - constexpr auto end() const noexcept { return data_.end(); } - constexpr auto cbegin() const noexcept { return data_.cbegin(); } - constexpr auto cend() const noexcept { return data_.cend(); } - - constexpr auto rbegin() noexcept { return data_.rbegin(); } - constexpr auto rend() noexcept { return data_.rend(); } - constexpr auto rbegin() const noexcept { return data_.rbegin(); } - constexpr auto rend() const noexcept { return data_.rend(); } - constexpr auto crbegin() const noexcept { return data_.crbegin(); } - constexpr auto crend() const noexcept { return data_.crend(); } - - constexpr bool empty() const noexcept { return data_.empty(); } - constexpr size_type size() const noexcept { return data_.size(); } - constexpr size_type max_size() const noexcept { return data_.max_size(); } - - template - void fill(const U& v) { - static_assert(Detail::is_strictly_assignable_v, - "Cannot assign fill value to entry type"); - for (auto& ele : data_) - { - if constexpr (sizeof...(Sizes) == 0) - ele = v; - else - ele.fill(v); - } - } - - constexpr void swap(MultiArray& other) noexcept { data_.swap(other.data_); } -}; - - -// xorshift64star Pseudo-Random Number Generator -// This class is based on original code written and dedicated -// to the public domain by Sebastiano Vigna (2014). -// It has the following characteristics: -// -// - Outputs 64-bit numbers -// - Passes Dieharder and SmallCrush test batteries -// - Does not require warm-up, no zeroland to escape -// - Internal state is a single 64-bit integer -// - Period is 2^64 - 1 -// - Speed: 1.60 ns/call (Core i7 @3.40GHz) -// -// For further analysis see -// - -class PRNG { - - uint64_t s; - - uint64_t rand64() { - - s ^= s >> 12, s ^= s << 25, s ^= s >> 27; - return s * 2685821657736338717LL; - } - - public: - PRNG(uint64_t seed) : - s(seed) { - assert(seed); - } - - template - T rand() { - return T(rand64()); - } - - // Special generator used to fast init magic numbers. - // Output values only have 1/8th of their bits set on average. - template - T sparse_rand() { - return T(rand64() & rand64() & rand64()); - } -}; - -inline uint64_t mul_hi64(uint64_t a, uint64_t b) { -#if defined(__GNUC__) && defined(IS_64BIT) - __extension__ using uint128 = unsigned __int128; - return (uint128(a) * uint128(b)) >> 64; -#else - uint64_t aL = uint32_t(a), aH = a >> 32; - uint64_t bL = uint32_t(b), bH = b >> 32; - uint64_t c1 = (aL * bL) >> 32; - uint64_t c2 = aH * bL + c1; - uint64_t c3 = aL * bH + uint32_t(c2); - return aH * bH + (c2 >> 32) + (c3 >> 32); -#endif -} - -uint64_t hash_bytes(const char*, size_t); - -template -inline std::size_t get_raw_data_hash(const T& value) { - // We must have no padding bytes because we're reinterpreting as char - static_assert(std::has_unique_object_representations()); - - return static_cast( - hash_bytes(reinterpret_cast(&value), sizeof(value))); -} - -template -inline void hash_combine(std::size_t& seed, const T& v) { - std::size_t x; - // For primitive types we avoid using the default hasher, which may be - // nondeterministic across program invocations - if constexpr (std::is_integral()) - x = v; - else - x = std::hash{}(v); - seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2); -} - -inline std::uint64_t hash_string(const std::string& sv) { return hash_bytes(sv.data(), sv.size()); } - -template -class FixedString { - public: - FixedString() : - length_(0) { - data_[0] = '\0'; - } - - FixedString(const char* str) { - size_t len = std::strlen(str); - if (len > Capacity) - std::terminate(); - std::memcpy(data_, str, len); - length_ = len; - data_[length_] = '\0'; - } - - FixedString(const std::string& str) { - if (str.size() > Capacity) - std::terminate(); - std::memcpy(data_, str.data(), str.size()); - length_ = str.size(); - data_[length_] = '\0'; - } - - std::size_t size() const { return length_; } - std::size_t capacity() const { return Capacity; } - - const char* c_str() const { return data_; } - const char* data() const { return data_; } - - char& operator[](std::size_t i) { return data_[i]; } - - const char& operator[](std::size_t i) const { return data_[i]; } - - FixedString& operator+=(const char* str) { - size_t len = std::strlen(str); - if (length_ + len > Capacity) - std::terminate(); - std::memcpy(data_ + length_, str, len); - length_ += len; - data_[length_] = '\0'; - return *this; - } - - FixedString& operator+=(const FixedString& other) { return (*this += other.c_str()); } - - operator std::string() const { return std::string(data_, length_); } - - operator std::string_view() const { return std::string_view(data_, length_); } - - template - bool operator==(const T& other) const noexcept { - return (std::string_view) (*this) == other; - } - - template - bool operator!=(const T& other) const noexcept { - return (std::string_view) (*this) != other; - } - - void clear() { - length_ = 0; - data_[0] = '\0'; - } - - private: - char data_[Capacity + 1]; // +1 for null terminator - std::size_t length_; -}; - -struct CommandLine { - public: - CommandLine(int _argc, char** _argv) : - argc(_argc), - argv(_argv) {} - - static std::string get_binary_directory(std::string argv0); - static std::string get_working_directory(); - - int argc; - char** argv; -}; - -namespace Utility { - -template -void move_to_front(std::vector& vec, Predicate pred) { - auto it = std::find_if(vec.begin(), vec.end(), pred); - - if (it != vec.end()) - { - std::rotate(vec.begin(), it, it + 1); - } -} -} - -#if defined(__GNUC__) - #define sf_always_inline __attribute__((always_inline)) -#elif defined(_MSC_VER) - #define sf_always_inline __forceinline -#else - // do nothing for other compilers - #define sf_always_inline -#endif - -#if defined(__clang__) - #define sf_assume(cond) __builtin_assume(cond) -#elif defined(__GNUC__) - #if __GNUC__ >= 13 - #define sf_assume(cond) __attribute__((assume(cond))) - #else - #define sf_assume(cond) \ - do \ - { \ - if (!(cond)) \ - __builtin_unreachable(); \ - } while (0) - #endif -#elif defined(_MSC_VER) - #define sf_assume(cond) __assume(cond) -#else - // do nothing for other compilers - #define sf_assume(cond) -#endif - -#ifdef __GNUC__ - #define sf_unreachable() __builtin_unreachable() -#elif defined(_MSC_VER) - #define sf_unreachable() __assume(0) -#else - #define sf_unreachable() -#endif - -} // namespace Stockfish - -template -struct std::hash> { - std::size_t operator()(const Stockfish::FixedString& fstr) const noexcept { - return Stockfish::hash_bytes(fstr.data(), fstr.size()); - } -}; - -#endif // #ifndef MISC_H_INCLUDED diff --git a/src/movegen.cpp b/src/movegen.cpp deleted file mode 100644 index e63707a1ff4baa70fc0a64d3feb2b09505f0e14c..0000000000000000000000000000000000000000 --- a/src/movegen.cpp +++ /dev/null @@ -1,312 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "movegen.h" - -#include -#include - -#include "bitboard.h" -#include "position.h" - -#if defined(USE_AVX512ICL) - #include - #include - #include -#endif - -namespace Stockfish { - -namespace { - -#if defined(USE_AVX512ICL) - -inline Move* write_moves(Move* moveList, uint32_t mask, __m512i vector) { - // Avoid _mm512_mask_compressstoreu_epi16() as it's 256 uOps on Zen4 - _mm512_storeu_si512(reinterpret_cast<__m512i*>(moveList), - _mm512_maskz_compress_epi16(mask, vector)); - return moveList + popcount(mask); -} - -template -inline Move* splat_pawn_moves(Move* moveList, Bitboard to_bb) { - alignas(64) static constexpr auto SPLAT_TABLE = [] { - std::array table{}; - for (int i = 0; i < 64; i++) - { - Square from{uint8_t(std::clamp(i - offset, 0, 63))}; - table[i] = {Move(from, Square{uint8_t(i)})}; - } - return table; - }(); - - auto table = reinterpret_cast(SPLAT_TABLE.data()); - - moveList = - write_moves(moveList, static_cast(to_bb >> 0), _mm512_load_si512(table + 0)); - moveList = - write_moves(moveList, static_cast(to_bb >> 32), _mm512_load_si512(table + 1)); - - return moveList; -} - -inline Move* splat_moves(Move* moveList, Square from, Bitboard to_bb) { - alignas(64) static constexpr auto SPLAT_TABLE = [] { - std::array table{}; - for (uint8_t i = 0; i < 64; i++) - table[i] = {Move(SQUARE_ZERO, Square{i})}; - return table; - }(); - - __m512i fromVec = _mm512_set1_epi16(Move(from, SQUARE_ZERO).raw()); - - auto table = reinterpret_cast(SPLAT_TABLE.data()); - - moveList = write_moves(moveList, static_cast(to_bb >> 0), - _mm512_or_si512(_mm512_load_si512(table + 0), fromVec)); - moveList = write_moves(moveList, static_cast(to_bb >> 32), - _mm512_or_si512(_mm512_load_si512(table + 1), fromVec)); - - return moveList; -} - -#else - -template -inline Move* splat_pawn_moves(Move* moveList, Bitboard to_bb) { - while (to_bb) - { - Square to = pop_lsb(to_bb); - *moveList++ = Move(to - offset, to); - } - return moveList; -} - -inline Move* splat_moves(Move* moveList, Square from, Bitboard to_bb) { - while (to_bb) - *moveList++ = Move(from, pop_lsb(to_bb)); - return moveList; -} - -#endif - -template -Move* make_promotions(Move* moveList, [[maybe_unused]] Square to) { - - constexpr bool all = Type == EVASIONS || Type == NON_EVASIONS; - - if constexpr (Type == CAPTURES || all) - *moveList++ = Move::make(to - D, to, QUEEN); - - if constexpr ((Type == CAPTURES && Enemy) || (Type == QUIETS && !Enemy) || all) - { - *moveList++ = Move::make(to - D, to, ROOK); - *moveList++ = Move::make(to - D, to, BISHOP); - *moveList++ = Move::make(to - D, to, KNIGHT); - } - - return moveList; -} - - -template -Move* generate_pawn_moves(const Position& pos, Move* moveList, Bitboard target) { - - constexpr Color Them = ~Us; - constexpr Bitboard TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB); - constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB); - constexpr Direction Up = pawn_push(Us); - constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST); - constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST); - - const Bitboard emptySquares = ~pos.pieces(); - const Bitboard enemies = Type == EVASIONS ? pos.checkers() : pos.pieces(Them); - - Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB; - Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB; - - // Single and double pawn pushes, no promotions - if constexpr (Type != CAPTURES) - { - Bitboard b1 = shift(pawnsNotOn7) & emptySquares; - Bitboard b2 = shift(b1 & TRank3BB) & emptySquares; - - if constexpr (Type == EVASIONS) // Consider only blocking squares - { - b1 &= target; - b2 &= target; - } - - moveList = splat_pawn_moves(moveList, b1); - moveList = splat_pawn_moves(moveList, b2); - } - - // Promotions and underpromotions - if (pawnsOn7) - { - Bitboard b1 = shift(pawnsOn7) & enemies; - Bitboard b2 = shift(pawnsOn7) & enemies; - Bitboard b3 = shift(pawnsOn7) & emptySquares; - - if constexpr (Type == EVASIONS) - b3 &= target; - - while (b1) - moveList = make_promotions(moveList, pop_lsb(b1)); - - while (b2) - moveList = make_promotions(moveList, pop_lsb(b2)); - - while (b3) - moveList = make_promotions(moveList, pop_lsb(b3)); - } - - // Standard and en passant captures - if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) - { - Bitboard b1 = shift(pawnsNotOn7) & enemies; - Bitboard b2 = shift(pawnsNotOn7) & enemies; - - moveList = splat_pawn_moves(moveList, b1); - moveList = splat_pawn_moves(moveList, b2); - - if (pos.ep_square() != SQ_NONE) - { - assert(rank_of(pos.ep_square()) == relative_rank(Us, RANK_6)); - - // An en passant capture cannot resolve a discovered check - if (Type == EVASIONS && (target & (pos.ep_square() + Up))) - return moveList; - - b1 = pawnsNotOn7 & attacks_bb(pos.ep_square(), Them); - - assert(b1); - - while (b1) - *moveList++ = Move::make(pop_lsb(b1), pos.ep_square()); - } - } - - return moveList; -} - - -template -Move* generate_moves(const Position& pos, Move* moveList, Bitboard target) { - - static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()"); - - Bitboard bb = pos.pieces(Us, Pt); - - while (bb) - { - Square from = pop_lsb(bb); - Bitboard b = attacks_bb(from, pos.pieces()) & target; - - moveList = splat_moves(moveList, from, b); - } - - return moveList; -} - - -template -Move* generate_all(const Position& pos, Move* moveList) { - - static_assert(Type != LEGAL, "Unsupported type in generate_all()"); - - const Square ksq = pos.square(Us); - Bitboard target; - - // Skip generating non-king moves when in double check - if (Type != EVASIONS || !more_than_one(pos.checkers())) - { - target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) - : Type == NON_EVASIONS ? ~pos.pieces(Us) - : Type == CAPTURES ? pos.pieces(~Us) - : ~pos.pieces(); // QUIETS - - moveList = generate_pawn_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - } - - Bitboard b = attacks_bb(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target); - - moveList = splat_moves(moveList, ksq, b); - - if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING)) - for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE}) - if (!pos.castling_impeded(cr) && pos.can_castle(cr)) - *moveList++ = Move::make(ksq, pos.castling_rook_square(cr)); - - return moveList; -} - -} // namespace - - -// Generates all pseudo-legal captures plus queen promotions -// Generates all pseudo-legal non-captures and underpromotions -// Generates all pseudo-legal check evasions -// Generates all pseudo-legal captures and non-captures -// -// Returns a pointer to the end of the move list. -template -Move* generate(const Position& pos, Move* moveList) { - - static_assert(Type != LEGAL, "Unsupported type in generate()"); - assert((Type == EVASIONS) == bool(pos.checkers())); - - Color us = pos.side_to_move(); - - return us == WHITE ? generate_all(pos, moveList) - : generate_all(pos, moveList); -} - -// Explicit template instantiations -template Move* generate(const Position&, Move*); -template Move* generate(const Position&, Move*); -template Move* generate(const Position&, Move*); -template Move* generate(const Position&, Move*); - -// generate generates all the legal moves in the given position - -template<> -Move* generate(const Position& pos, Move* moveList) { - - Color us = pos.side_to_move(); - Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us); - Square ksq = pos.square(us); - Move* cur = moveList; - - moveList = - pos.checkers() ? generate(pos, moveList) : generate(pos, moveList); - while (cur != moveList) - if (((pinned & cur->from_sq()) || cur->from_sq() == ksq || cur->type_of() == EN_PASSANT) - && !pos.legal(*cur)) - *cur = *(--moveList); - else - ++cur; - - return moveList; -} - -} // namespace Stockfish diff --git a/src/movegen.h b/src/movegen.h deleted file mode 100644 index 7f209f92a7eee591a65f75223f1ad8f261f7781a..0000000000000000000000000000000000000000 --- a/src/movegen.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef MOVEGEN_H_INCLUDED -#define MOVEGEN_H_INCLUDED - -#include // IWYU pragma: keep -#include - -#include "types.h" - -namespace Stockfish { - -class Position; - -enum GenType { - CAPTURES, - QUIETS, - EVASIONS, - NON_EVASIONS, - LEGAL -}; - -struct ExtMove: public Move { - int value; - - void operator=(Move m) { data = m.raw(); } - - // Inhibit unwanted implicit conversions to Move - // with an ambiguity that yields to a compile error. - operator float() const = delete; -}; - -inline bool operator<(const ExtMove& f, const ExtMove& s) { return f.value < s.value; } - -template -Move* generate(const Position& pos, Move* moveList); - -// The MoveList struct wraps the generate() function and returns a convenient -// list of moves. Using MoveList is sometimes preferable to directly calling -// the lower level generate() function. -template -struct MoveList { - - explicit MoveList(const Position& pos) : - last(generate(pos, moveList)) {} - const Move* begin() const { return moveList; } - const Move* end() const { return last; } - size_t size() const { return last - moveList; } - bool contains(Move move) const { return std::find(begin(), end(), move) != end(); } - - private: - Move moveList[MAX_MOVES], *last; -}; - -} // namespace Stockfish - -#endif // #ifndef MOVEGEN_H_INCLUDED diff --git a/src/movepick.cpp b/src/movepick.cpp deleted file mode 100644 index 23b7facbb96a70fc77a84b9db9875ab1edab6a3a..0000000000000000000000000000000000000000 --- a/src/movepick.cpp +++ /dev/null @@ -1,313 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "movepick.h" - -#include -#include -#include - -#include "bitboard.h" -#include "misc.h" -#include "position.h" - -namespace Stockfish { - -namespace { - -enum Stages { - // generate main search moves - MAIN_TT, - CAPTURE_INIT, - GOOD_CAPTURE, - QUIET_INIT, - GOOD_QUIET, - BAD_CAPTURE, - BAD_QUIET, - - // generate evasion moves - EVASION_TT, - EVASION_INIT, - EVASION, - - // generate probcut moves - PROBCUT_TT, - PROBCUT_INIT, - PROBCUT, - - // generate qsearch moves - QSEARCH_TT, - QCAPTURE_INIT, - QCAPTURE -}; - - -// Sort moves in descending order up to and including a given limit. -// The order of moves smaller than the limit is left unspecified. -void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { - - for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p) - if (p->value >= limit) - { - ExtMove tmp = *p, *q; - *p = *++sortedEnd; - for (q = sortedEnd; q != begin && *(q - 1) < tmp; --q) - *q = *(q - 1); - *q = tmp; - } -} - -} // namespace - - -// Constructors of the MovePicker class. As arguments, we pass information -// to decide which class of moves to emit, to help sorting the (presumably) -// good moves first, and how important move ordering is at the current node. - -// MovePicker constructor for the main search and for the quiescence search -MovePicker::MovePicker(const Position& p, - Move ttm, - Depth d, - const ButterflyHistory* mh, - const LowPlyHistory* lph, - const CapturePieceToHistory* cph, - const PieceToHistory** ch, - const SharedHistories* sh, - int pl) : - pos(p), - mainHistory(mh), - lowPlyHistory(lph), - captureHistory(cph), - continuationHistory(ch), - sharedHistory(sh), - ttMove(ttm), - depth(d), - ply(pl) { - - if (pos.checkers()) - stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm)); - - else - stage = (depth > 0 ? MAIN_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm)); -} - -// MovePicker constructor for ProbCut: we generate captures with Static Exchange -// Evaluation (SEE) greater than or equal to the given threshold. -MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) : - pos(p), - captureHistory(cph), - ttMove(ttm), - threshold(th) { - assert(!pos.checkers()); - - stage = PROBCUT_TT + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm)); -} - -// Assigns a numerical value to each move in a list, used for sorting. -// Captures are ordered by Most Valuable Victim (MVV), preferring captures -// with a good history. Quiets moves are ordered using the history tables. -template -ExtMove* MovePicker::score(MoveList& ml) { - - static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); - - Color us = pos.side_to_move(); - - [[maybe_unused]] Bitboard threatByLesser[KING + 1]; - if constexpr (Type == QUIETS) - { - threatByLesser[PAWN] = 0; - threatByLesser[KNIGHT] = threatByLesser[BISHOP] = pos.attacks_by(~us); - threatByLesser[ROOK] = - pos.attacks_by(~us) | pos.attacks_by(~us) | threatByLesser[KNIGHT]; - threatByLesser[QUEEN] = pos.attacks_by(~us) | threatByLesser[ROOK]; - threatByLesser[KING] = 0; - } - - ExtMove* it = cur; - for (auto move : ml) - { - ExtMove& m = *it++; - m = move; - - const Square from = m.from_sq(); - const Square to = m.to_sq(); - const Piece pc = pos.moved_piece(m); - const PieceType pt = type_of(pc); - const Piece capturedPiece = pos.piece_on(to); - - if constexpr (Type == CAPTURES) - m.value = (*captureHistory)[pc][to][type_of(capturedPiece)] - + 7 * int(PieceValue[capturedPiece]); - - else if constexpr (Type == QUIETS) - { - // histories - m.value = 2 * (*mainHistory)[us][m.raw()]; - m.value += 2 * sharedHistory->pawn_entry(pos)[pc][to]; - m.value += (*continuationHistory[0])[pc][to]; - m.value += (*continuationHistory[1])[pc][to]; - m.value += (*continuationHistory[2])[pc][to]; - m.value += (*continuationHistory[3])[pc][to]; - m.value += (*continuationHistory[5])[pc][to]; - - // bonus for checks - m.value += (bool(pos.check_squares(pt) & to) && pos.see_ge(m, -75)) * 16384; - - // penalty for moving to a square threatened by a lesser piece - // or bonus for escaping an attack by a lesser piece. - int v = 20 * (bool(threatByLesser[pt] & from) - bool(threatByLesser[pt] & to)); - m.value += PieceValue[pt] * v; - - - if (ply < LOW_PLY_HISTORY_SIZE) - m.value += 8 * (*lowPlyHistory)[ply][m.raw()] / (1 + ply); - } - - else // Type == EVASIONS - { - if (pos.capture_stage(m)) - m.value = PieceValue[capturedPiece] + (1 << 28); - else - m.value = (*mainHistory)[us][m.raw()] + (*continuationHistory[0])[pc][to]; - } - } - return it; -} - -// Returns the next move satisfying a predicate function. -// This never returns the TT move, as it was emitted before. -template -Move MovePicker::select(Pred filter) { - - for (; cur < endCur; ++cur) - if (*cur != ttMove && filter()) - return *cur++; - - return Move::none(); -} - -// This is the most important method of the MovePicker class. We emit one -// new pseudo-legal move on every call until there are no more moves left, -// picking the move with the highest score from a list of generated moves. -Move MovePicker::next_move() { - - constexpr int goodQuietThreshold = -14000; -top: - switch (stage) - { - - case MAIN_TT : - case EVASION_TT : - case QSEARCH_TT : - case PROBCUT_TT : - ++stage; - return ttMove; - - case CAPTURE_INIT : - case PROBCUT_INIT : - case QCAPTURE_INIT : { - MoveList ml(pos); - - cur = endBadCaptures = moves; - endCur = endCaptures = score(ml); - - partial_insertion_sort(cur, endCur, std::numeric_limits::min()); - ++stage; - goto top; - } - - case GOOD_CAPTURE : - if (select([&]() { - if (pos.see_ge(*cur, -cur->value / 18)) - return true; - std::swap(*endBadCaptures++, *cur); - return false; - })) - return *(cur - 1); - - ++stage; - [[fallthrough]]; - - case QUIET_INIT : - if (!skipQuiets) - { - MoveList ml(pos); - - endCur = endGenerated = score(ml); - - partial_insertion_sort(cur, endCur, -3560 * depth); - } - - ++stage; - [[fallthrough]]; - - case GOOD_QUIET : - if (!skipQuiets && select([&]() { return cur->value > goodQuietThreshold; })) - return *(cur - 1); - - // Prepare the pointers to loop over the bad captures - cur = moves; - endCur = endBadCaptures; - - ++stage; - [[fallthrough]]; - - case BAD_CAPTURE : - if (select([]() { return true; })) - return *(cur - 1); - - // Prepare the pointers to loop over quiets again - cur = endCaptures; - endCur = endGenerated; - - ++stage; - [[fallthrough]]; - - case BAD_QUIET : - if (!skipQuiets) - return select([&]() { return cur->value <= goodQuietThreshold; }); - - return Move::none(); - - case EVASION_INIT : { - MoveList ml(pos); - - cur = moves; - endCur = endGenerated = score(ml); - - partial_insertion_sort(cur, endCur, std::numeric_limits::min()); - ++stage; - [[fallthrough]]; - } - - case EVASION : - case QCAPTURE : - return select([]() { return true; }); - - case PROBCUT : - return select([&]() { return pos.see_ge(*cur, threshold); }); - } - - assert(false); - return Move::none(); // Silence warning -} - -void MovePicker::skip_quiet_moves() { skipQuiets = true; } - -} // namespace Stockfish diff --git a/src/movepick.h b/src/movepick.h deleted file mode 100644 index 08bd9a539154ffd93ed2021aeec8e72fcff8881d..0000000000000000000000000000000000000000 --- a/src/movepick.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef MOVEPICK_H_INCLUDED -#define MOVEPICK_H_INCLUDED - -#include "history.h" -#include "movegen.h" -#include "types.h" - -namespace Stockfish { - -class Position; - -// The MovePicker class is used to pick one pseudo-legal move at a time from the -// current position. The most important method is next_move(), which emits one -// new pseudo-legal move on every call, until there are no moves left, when -// Move::none() is returned. In order to improve the efficiency of the alpha-beta -// algorithm, MovePicker attempts to return the moves which are most likely to get -// a cut-off first. -class MovePicker { - - public: - MovePicker(const MovePicker&) = delete; - MovePicker& operator=(const MovePicker&) = delete; - MovePicker(const Position&, - Move, - Depth, - const ButterflyHistory*, - const LowPlyHistory*, - const CapturePieceToHistory*, - const PieceToHistory**, - const SharedHistories*, - int); - MovePicker(const Position&, Move, int, const CapturePieceToHistory*); - Move next_move(); - void skip_quiet_moves(); - - private: - template - Move select(Pred); - template - ExtMove* score(MoveList&); - ExtMove* begin() { return cur; } - ExtMove* end() { return endCur; } - - const Position& pos; - const ButterflyHistory* mainHistory; - const LowPlyHistory* lowPlyHistory; - const CapturePieceToHistory* captureHistory; - const PieceToHistory** continuationHistory; - const SharedHistories* sharedHistory; - Move ttMove; - ExtMove * cur, *endCur, *endBadCaptures, *endCaptures, *endGenerated; - int stage; - int threshold; - Depth depth; - int ply; - bool skipQuiets = false; - ExtMove moves[MAX_MOVES]; -}; - -} // namespace Stockfish - -#endif // #ifndef MOVEPICK_H_INCLUDED diff --git a/src/nnue/features/full_threats.cpp b/src/nnue/features/full_threats.cpp deleted file mode 100644 index 03ad158e6c149dbd6ba3adf33a21ed18a2a3cf7f..0000000000000000000000000000000000000000 --- a/src/nnue/features/full_threats.cpp +++ /dev/null @@ -1,343 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -//Definition of input features FullThreats of NNUE evaluation function - -#include "full_threats.h" - -#include -#include -#include -#include -#include - -#include "../../bitboard.h" -#include "../../misc.h" -#include "../../position.h" -#include "../../types.h" -#include "../nnue_common.h" - -namespace Stockfish::Eval::NNUE::Features { - -struct HelperOffsets { - int cumulativePieceOffset, cumulativeOffset; -}; - -constexpr std::array AllPieces = { - W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, - B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, -}; - -template -constexpr auto make_piece_indices_type() { - static_assert(PT != PieceType::PAWN); - - std::array, SQUARE_NB> out{}; - - for (Square from = SQ_A1; from <= SQ_H8; ++from) - { - Bitboard attacks = PseudoAttacks[PT][from]; - - for (Square to = SQ_A1; to <= SQ_H8; ++to) - { - out[from][to] = constexpr_popcount(((1ULL << to) - 1) & attacks); - } - } - - return out; -} - -template -constexpr auto make_piece_indices_piece() { - static_assert(type_of(P) == PieceType::PAWN); - - std::array, SQUARE_NB> out{}; - - constexpr Color C = color_of(P); - - for (Square from = SQ_A1; from <= SQ_H8; ++from) - { - Bitboard attacks = PseudoAttacks[C][from]; - - for (Square to = SQ_A1; to <= SQ_H8; ++to) - { - out[from][to] = constexpr_popcount(((1ULL << to) - 1) & attacks); - } - } - - return out; -} - -constexpr auto index_lut2_array() { - constexpr auto KNIGHT_ATTACKS = make_piece_indices_type(); - constexpr auto BISHOP_ATTACKS = make_piece_indices_type(); - constexpr auto ROOK_ATTACKS = make_piece_indices_type(); - constexpr auto QUEEN_ATTACKS = make_piece_indices_type(); - constexpr auto KING_ATTACKS = make_piece_indices_type(); - - std::array, SQUARE_NB>, PIECE_NB> indices{}; - - indices[W_PAWN] = make_piece_indices_piece(); - indices[B_PAWN] = make_piece_indices_piece(); - - indices[W_KNIGHT] = KNIGHT_ATTACKS; - indices[B_KNIGHT] = KNIGHT_ATTACKS; - - indices[W_BISHOP] = BISHOP_ATTACKS; - indices[B_BISHOP] = BISHOP_ATTACKS; - - indices[W_ROOK] = ROOK_ATTACKS; - indices[B_ROOK] = ROOK_ATTACKS; - - indices[W_QUEEN] = QUEEN_ATTACKS; - indices[B_QUEEN] = QUEEN_ATTACKS; - - indices[W_KING] = KING_ATTACKS; - indices[B_KING] = KING_ATTACKS; - - return indices; -} - -constexpr auto init_threat_offsets() { - std::array indices{}; - std::array, PIECE_NB> offsets{}; - - int cumulativeOffset = 0; - for (Piece piece : AllPieces) - { - int pieceIdx = piece; - int cumulativePieceOffset = 0; - - for (Square from = SQ_A1; from <= SQ_H8; ++from) - { - offsets[pieceIdx][from] = cumulativePieceOffset; - - if (type_of(piece) != PAWN) - { - Bitboard attacks = PseudoAttacks[type_of(piece)][from]; - cumulativePieceOffset += constexpr_popcount(attacks); - } - - else if (from >= SQ_A2 && from <= SQ_H7) - { - Bitboard attacks = (pieceIdx < 8) ? pawn_attacks_bb(square_bb(from)) - : pawn_attacks_bb(square_bb(from)); - cumulativePieceOffset += constexpr_popcount(attacks); - } - } - - indices[pieceIdx] = {cumulativePieceOffset, cumulativeOffset}; - - cumulativeOffset += numValidTargets[pieceIdx] * cumulativePieceOffset; - } - - return std::pair{indices, offsets}; -} - -constexpr auto helper_offsets = init_threat_offsets().first; -// Lookup array for indexing threats -constexpr auto offsets = init_threat_offsets().second; - -constexpr auto init_index_luts() { - std::array, PIECE_NB>, PIECE_NB> indices{}; - - for (Piece attacker : AllPieces) - { - for (Piece attacked : AllPieces) - { - bool enemy = (attacker ^ attacked) == 8; - PieceType attackerType = type_of(attacker); - PieceType attackedType = type_of(attacked); - - int map = FullThreats::map[attackerType - 1][attackedType - 1]; - bool semi_excluded = attackerType == attackedType && (enemy || attackerType != PAWN); - IndexType feature = helper_offsets[attacker].cumulativeOffset - + (color_of(attacked) * (numValidTargets[attacker] / 2) + map) - * helper_offsets[attacker].cumulativePieceOffset; - - bool excluded = map < 0; - indices[attacker][attacked][0] = excluded ? FullThreats::Dimensions : feature; - indices[attacker][attacked][1] = - excluded || semi_excluded ? FullThreats::Dimensions : feature; - } - } - - return indices; -} - -// The final index is calculated from summing data found in these two LUTs, as well -// as offsets[attacker][from] - -// [attacker][attacked][from < to] -constexpr auto index_lut1 = init_index_luts(); -// [attacker][from][to] -constexpr auto index_lut2 = index_lut2_array(); - -// Index of a feature for a given king position and another piece on some square -inline sf_always_inline IndexType FullThreats::make_index( - Color perspective, Piece attacker, Square from, Square to, Piece attacked, Square ksq) { - const std::int8_t orientation = OrientTBL[ksq] ^ (56 * perspective); - unsigned from_oriented = uint8_t(from) ^ orientation; - unsigned to_oriented = uint8_t(to) ^ orientation; - - std::int8_t swap = 8 * perspective; - unsigned attacker_oriented = attacker ^ swap; - unsigned attacked_oriented = attacked ^ swap; - - return index_lut1[attacker_oriented][attacked_oriented][from_oriented < to_oriented] - + offsets[attacker_oriented][from_oriented] - + index_lut2[attacker_oriented][from_oriented][to_oriented]; -} - -// Get a list of indices for active features in ascending order - -void FullThreats::append_active_indices(Color perspective, const Position& pos, IndexList& active) { - Square ksq = pos.square(perspective); - Bitboard occupied = pos.pieces(); - - for (Color color : {WHITE, BLACK}) - { - for (PieceType pt = PAWN; pt < KING; ++pt) - { - Color c = Color(perspective ^ color); - Piece attacker = make_piece(c, pt); - Bitboard bb = pos.pieces(c, pt); - - if (pt == PAWN) - { - auto right = (c == WHITE) ? NORTH_EAST : SOUTH_WEST; - auto left = (c == WHITE) ? NORTH_WEST : SOUTH_EAST; - auto attacks_left = - ((c == WHITE) ? shift(bb) : shift(bb)) & occupied; - auto attacks_right = - ((c == WHITE) ? shift(bb) : shift(bb)) & occupied; - - while (attacks_left) - { - Square to = pop_lsb(attacks_left); - Square from = to - right; - Piece attacked = pos.piece_on(to); - IndexType index = make_index(perspective, attacker, from, to, attacked, ksq); - - if (index < Dimensions) - active.push_back(index); - } - - while (attacks_right) - { - Square to = pop_lsb(attacks_right); - Square from = to - left; - Piece attacked = pos.piece_on(to); - IndexType index = make_index(perspective, attacker, from, to, attacked, ksq); - - if (index < Dimensions) - active.push_back(index); - } - } - else - { - while (bb) - { - Square from = pop_lsb(bb); - Bitboard attacks = (attacks_bb(pt, from, occupied)) & occupied; - - while (attacks) - { - Square to = pop_lsb(attacks); - Piece attacked = pos.piece_on(to); - IndexType index = - make_index(perspective, attacker, from, to, attacked, ksq); - - if (index < Dimensions) - active.push_back(index); - } - } - } - } - } -} - -// Get a list of indices for recently changed features - -void FullThreats::append_changed_indices(Color perspective, - Square ksq, - const DiffType& diff, - IndexList& removed, - IndexList& added, - FusedUpdateData* fusedData, - bool first, - const ThreatWeightType* prefetchBase, - IndexType prefetchStride) { - - for (const auto& dirty : diff.list) - { - auto attacker = dirty.pc(); - auto attacked = dirty.threatened_pc(); - auto from = dirty.pc_sq(); - auto to = dirty.threatened_sq(); - auto add = dirty.add(); - - if (fusedData) - { - if (from == fusedData->dp2removed) - { - if (add) - { - if (first) - { - fusedData->dp2removedOriginBoard |= to; - continue; - } - } - else if (fusedData->dp2removedOriginBoard & to) - continue; - } - - if (to != SQ_NONE && to == fusedData->dp2removed) - { - if (add) - { - if (first) - { - fusedData->dp2removedTargetBoard |= from; - continue; - } - } - else if (fusedData->dp2removedTargetBoard & from) - continue; - } - } - - auto& insert = add ? added : removed; - const IndexType index = make_index(perspective, attacker, from, to, attacked, ksq); - - if (index < Dimensions) - { - if (prefetchBase) - prefetch( - prefetchBase + static_cast(index) * prefetchStride); - insert.push_back(index); - } - } -} - -bool FullThreats::requires_refresh(const DiffType& diff, Color perspective) { - return perspective == diff.us && (int8_t(diff.ksq) & 0b100) != (int8_t(diff.prevKsq) & 0b100); -} - -} // namespace Stockfish::Eval::NNUE::Features diff --git a/src/nnue/features/full_threats.h b/src/nnue/features/full_threats.h deleted file mode 100644 index 76f5b74c6f8c5cebf0969497bd096ab96b2ee2e3..0000000000000000000000000000000000000000 --- a/src/nnue/features/full_threats.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -//Definition of input features Simplified_Threats of NNUE evaluation function - -#ifndef NNUE_FEATURES_FULL_THREATS_INCLUDED -#define NNUE_FEATURES_FULL_THREATS_INCLUDED - -#include - -#include "../../misc.h" -#include "../../types.h" -#include "../nnue_common.h" - -namespace Stockfish { -class Position; -} - -namespace Stockfish::Eval::NNUE::Features { - -static constexpr int numValidTargets[PIECE_NB] = {0, 6, 10, 8, 8, 10, 0, 0, - 0, 6, 10, 8, 8, 10, 0, 0}; - -class FullThreats { - public: - // Feature name - static constexpr const char* Name = "Full_Threats(Friend)"; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t HashValue = 0x8f234cb8u; - - // Number of feature dimensions - static constexpr IndexType Dimensions = 60144; - - // clang-format off - // Orient a square according to perspective (rotates by 180 for black) - static constexpr std::int8_t OrientTBL[SQUARE_NB] = { - SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, - SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, - SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, - SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, - SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, - SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, - SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, - SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, - }; - - static constexpr int map[PIECE_TYPE_NB-2][PIECE_TYPE_NB-2] = { - { 0, 1, -1, 2, -1, -1}, - { 0, 1, 2, 3, 4, -1}, - { 0, 1, 2, 3, -1, -1}, - { 0, 1, 2, 3, -1, -1}, - { 0, 1, 2, 3, 4, -1}, - {-1, -1, -1, -1, -1, -1} - }; - // clang-format on - - struct FusedUpdateData { - Bitboard dp2removedOriginBoard = 0; - Bitboard dp2removedTargetBoard = 0; - - Square dp2removed; - }; - - // Maximum number of simultaneously active features. - static constexpr IndexType MaxActiveDimensions = 128; - using IndexList = ValueList; - using DiffType = DirtyThreats; - - static IndexType - make_index(Color perspective, Piece attkr, Square from, Square to, Piece attkd, Square ksq); - - // Get a list of indices for active features - static void append_active_indices(Color perspective, const Position& pos, IndexList& active); - - // Get a list of indices for recently changed features - static void append_changed_indices(Color perspective, - Square ksq, - const DiffType& diff, - IndexList& removed, - IndexList& added, - FusedUpdateData* fd = nullptr, - bool first = false, - const ThreatWeightType* prefetchBase = nullptr, - IndexType prefetchStride = 0); - - // Returns whether the change stored in this DirtyPiece means - // that a full accumulator refresh is required. - static bool requires_refresh(const DiffType& diff, Color perspective); -}; - -} // namespace Stockfish::Eval::NNUE::Features - -#endif // #ifndef NNUE_FEATURES_FULL_THREATS_INCLUDED diff --git a/src/nnue/features/half_ka_v2_hm.cpp b/src/nnue/features/half_ka_v2_hm.cpp deleted file mode 100644 index a82e89de486812d870d6fafa84f802b68ee0b715..0000000000000000000000000000000000000000 --- a/src/nnue/features/half_ka_v2_hm.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -//Definition of input features HalfKAv2_hm of NNUE evaluation function - -#include "half_ka_v2_hm.h" - -#include "../../bitboard.h" -#include "../../position.h" -#include "../../types.h" -#include "../nnue_common.h" - -namespace Stockfish::Eval::NNUE::Features { - -// Index of a feature for a given king position and another piece on some square - -IndexType HalfKAv2_hm::make_index(Color perspective, Square s, Piece pc, Square ksq) { - const IndexType flip = 56 * perspective; - return (IndexType(s) ^ OrientTBL[ksq] ^ flip) + PieceSquareIndex[perspective][pc] - + KingBuckets[int(ksq) ^ flip]; -} - -// Get a list of indices for active features - -void HalfKAv2_hm::append_active_indices(Color perspective, const Position& pos, IndexList& active) { - Square ksq = pos.square(perspective); - Bitboard bb = pos.pieces(); - while (bb) - { - Square s = pop_lsb(bb); - active.push_back(make_index(perspective, s, pos.piece_on(s), ksq)); - } -} - -// Get a list of indices for recently changed features - -void HalfKAv2_hm::append_changed_indices( - Color perspective, Square ksq, const DiffType& diff, IndexList& removed, IndexList& added) { - removed.push_back(make_index(perspective, diff.from, diff.pc, ksq)); - if (diff.to != SQ_NONE) - added.push_back(make_index(perspective, diff.to, diff.pc, ksq)); - - if (diff.remove_sq != SQ_NONE) - removed.push_back(make_index(perspective, diff.remove_sq, diff.remove_pc, ksq)); - - if (diff.add_sq != SQ_NONE) - added.push_back(make_index(perspective, diff.add_sq, diff.add_pc, ksq)); -} - -bool HalfKAv2_hm::requires_refresh(const DiffType& diff, Color perspective) { - return diff.pc == make_piece(perspective, KING); -} - -} // namespace Stockfish::Eval::NNUE::Features diff --git a/src/nnue/features/half_ka_v2_hm.h b/src/nnue/features/half_ka_v2_hm.h deleted file mode 100644 index 49b0a87a4d01c705028ad0e991b12a3d24c2954d..0000000000000000000000000000000000000000 --- a/src/nnue/features/half_ka_v2_hm.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -//Definition of input features HalfKP of NNUE evaluation function - -#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED -#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED - -#include - -#include "../../misc.h" -#include "../../types.h" -#include "../nnue_common.h" - -namespace Stockfish { -class Position; -} - -namespace Stockfish::Eval::NNUE::Features { - -// Feature HalfKAv2_hm: Combination of the position of own king and the -// position of pieces. Position mirrored such that king is always on e..h files. -class HalfKAv2_hm { - - // Unique number for each piece type on each square - enum { - PS_NONE = 0, - PS_W_PAWN = 0, - PS_B_PAWN = 1 * SQUARE_NB, - PS_W_KNIGHT = 2 * SQUARE_NB, - PS_B_KNIGHT = 3 * SQUARE_NB, - PS_W_BISHOP = 4 * SQUARE_NB, - PS_B_BISHOP = 5 * SQUARE_NB, - PS_W_ROOK = 6 * SQUARE_NB, - PS_B_ROOK = 7 * SQUARE_NB, - PS_W_QUEEN = 8 * SQUARE_NB, - PS_B_QUEEN = 9 * SQUARE_NB, - PS_KING = 10 * SQUARE_NB, - PS_NB = 11 * SQUARE_NB - }; - - static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { - // Convention: W - us, B - them - // Viewed from other side, W and B are reversed - {PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, - PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE}, - {PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, - PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}}; - - public: - // Feature name - static constexpr const char* Name = "HalfKAv2_hm(Friend)"; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t HashValue = 0x7f234cb8u; - - // Number of feature dimensions - static constexpr IndexType Dimensions = - static_cast(SQUARE_NB) * static_cast(PS_NB) / 2; - -#define B(v) (v * PS_NB) - // clang-format off - static constexpr IndexType KingBuckets[SQUARE_NB] = { - B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28), - B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24), - B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20), - B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16), - B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12), - B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8), - B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4), - B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0), - }; - // clang-format on -#undef B - // clang-format off - // Orient a square according to perspective (rotates by 180 for black) - static constexpr IndexType OrientTBL[SQUARE_NB] = { - SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, - SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, - SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, - SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, - SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, - SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, - SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, - SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1 , - }; - // clang-format on - - // Maximum number of simultaneously active features. - static constexpr IndexType MaxActiveDimensions = 32; - using IndexList = ValueList; - using DiffType = DirtyPiece; - - // Index of a feature for a given king position and another piece on some square - - static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq); - - // Get a list of indices for active features - - static void append_active_indices(Color perspective, const Position& pos, IndexList& active); - - // Get a list of indices for recently changed features - static void append_changed_indices( - Color perspective, Square ksq, const DiffType& diff, IndexList& removed, IndexList& added); - - // Returns whether the change stored in this DirtyPiece means - // that a full accumulator refresh is required. - static bool requires_refresh(const DiffType& diff, Color perspective); -}; - -} // namespace Stockfish::Eval::NNUE::Features - -#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h deleted file mode 100644 index 6cd44e19c82b562515bdcd3a1543d4d698bbec66..0000000000000000000000000000000000000000 --- a/src/nnue/layers/affine_transform.h +++ /dev/null @@ -1,312 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Definition of layer AffineTransform of NNUE evaluation function - -#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED -#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED - -#include -#include - -#include "../../memory.h" -#include "../nnue_common.h" -#include "../simd.h" - -/* - This file contains the definition for a fully connected layer (aka affine transform). - - - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32. - - that's why AVX512 is hard to implement - - expected use-case is small layers - - inputs are processed in chunks of 4, weights are respectively transposed - - accumulation happens directly to int32s -*/ - -namespace Stockfish::Eval::NNUE::Layers { - -#if defined(USE_SSSE3) || defined(USE_NEON_DOTPROD) - #define ENABLE_SEQ_OPT -#endif - -// Fallback implementation for older/other architectures. -// Requires the input to be padded to at least 16 values. -#ifndef ENABLE_SEQ_OPT - -template -static void affine_transform_non_ssse3(std::int32_t* output, - const std::int8_t* weights, - const std::int32_t* biases, - const std::uint8_t* input) { - #if defined(USE_SSE2) || defined(USE_NEON) - #if defined(USE_SSE2) - // At least a multiple of 16, with SSE2. - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; - const __m128i Zeros = _mm_setzero_si128(); - const auto inputVector = reinterpret_cast(input); - - #elif defined(USE_NEON) - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; - const auto inputVector = reinterpret_cast(input); - #endif - - for (IndexType i = 0; i < OutputDimensions; ++i) - { - const IndexType offset = i * PaddedInputDimensions; - - #if defined(USE_SSE2) - __m128i sumLo = _mm_cvtsi32_si128(biases[i]); - __m128i sumHi = Zeros; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) - { - __m128i row_j = _mm_load_si128(&row[j]); - __m128i input_j = _mm_load_si128(&inputVector[j]); - __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); - __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); - __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); - __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); - __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); - __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); - sumLo = _mm_add_epi32(sumLo, productLo); - sumHi = _mm_add_epi32(sumHi, productHi); - } - __m128i sum = _mm_add_epi32(sumLo, sumHi); - __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); - sum = _mm_add_epi32(sum, sumHigh_64); - __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); - sum = _mm_add_epi32(sum, sum_second_32); - output[i] = _mm_cvtsi128_si32(sum); - - #elif defined(USE_NEON) - - int32x4_t sum = {biases[i]}; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) - { - int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); - product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); - sum = vpadalq_s16(sum, product); - } - output[i] = SIMD::neon_m128_reduce_add_epi32(sum); - - #endif - } - #else - std::memcpy(output, biases, sizeof(std::int32_t) * OutputDimensions); - - // Traverse weights in transpose order to take advantage of input sparsity - for (IndexType i = 0; i < InputDimensions; ++i) - if (input[i]) - { - const std::int8_t* w = &weights[i]; - const int in = input[i]; - for (IndexType j = 0; j < OutputDimensions; ++j) - output[j] += w[j * PaddedInputDimensions] * in; - } - #endif -} - -#endif // !ENABLE_SEQ_OPT - -template -class AffineTransform { - public: - // Input/output type - using InputType = std::uint8_t; - using OutputType = std::int32_t; - - // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; - static constexpr IndexType OutputDimensions = OutDims; - - static constexpr IndexType PaddedInputDimensions = - ceil_to_multiple(InputDimensions, MaxSimdWidth); - static constexpr IndexType PaddedOutputDimensions = - ceil_to_multiple(OutputDimensions, MaxSimdWidth); - - using OutputBuffer = OutputType[PaddedOutputDimensions]; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0xCC03DAE4u; - hashValue += OutputDimensions; - hashValue ^= prevHash >> 1; - hashValue ^= prevHash << 31; - return hashValue; - } - - static constexpr IndexType get_weight_index_scrambled(IndexType i) { - return (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 - + i / PaddedInputDimensions * 4 + i % 4; - } - - static constexpr IndexType get_weight_index(IndexType i) { -#ifdef ENABLE_SEQ_OPT - return get_weight_index_scrambled(i); -#else - return i; -#endif - } - - // Read network parameters - bool read_parameters(std::istream& stream) { - read_little_endian(stream, biases, OutputDimensions); - for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - weights[get_weight_index(i)] = read_little_endian(stream); - - return !stream.fail(); - } - - // Write network parameters - bool write_parameters(std::ostream& stream) const { - write_little_endian(stream, biases, OutputDimensions); - - for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - write_little_endian(stream, weights[get_weight_index(i)]); - - return !stream.fail(); - } - - std::size_t get_content_hash() const { - std::size_t h = 0; - hash_combine(h, get_raw_data_hash(biases)); - hash_combine(h, get_raw_data_hash(weights)); - hash_combine(h, get_hash_value(0)); - return h; - } - - // Forward propagation - void propagate(const InputType* input, OutputType* output) const { - -#ifdef ENABLE_SEQ_OPT - - if constexpr (OutputDimensions > 1) - { - #if defined(USE_AVX512) - using vec_t = __m512i; - #define vec_set_32 _mm512_set1_epi32 - #define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32 - #elif defined(USE_AVX2) - using vec_t = __m256i; - #define vec_set_32 _mm256_set1_epi32 - #define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32 - #elif defined(USE_SSSE3) - using vec_t = __m128i; - #define vec_set_32 _mm_set1_epi32 - #define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32 - #elif defined(USE_NEON_DOTPROD) - using vec_t = int32x4_t; - #define vec_set_32 vdupq_n_s32 - #define vec_add_dpbusd_32(acc, a, b) \ - SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \ - vreinterpretq_s8_s32(b)) - #endif - - static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); - - static_assert(OutputDimensions % OutputSimdWidth == 0); - - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 4; - constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; - - const vec_t* biasvec = reinterpret_cast(biases); - vec_t acc[NumRegs]; - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = biasvec[k]; - - for (IndexType i = 0; i < NumChunks; ++i) - { - const vec_t in0 = - vec_set_32(load_as(input + i * sizeof(std::int32_t))); - const auto col0 = - reinterpret_cast(&weights[i * OutputDimensions * 4]); - - for (IndexType k = 0; k < NumRegs; ++k) - vec_add_dpbusd_32(acc[k], in0, col0[k]); - } - - vec_t* outptr = reinterpret_cast(output); - for (IndexType k = 0; k < NumRegs; ++k) - outptr[k] = acc[k]; - - #undef vec_set_32 - #undef vec_add_dpbusd_32 - } - else if constexpr (OutputDimensions == 1) - { - // We cannot use AVX512 for the last layer because there are only 32 inputs - // and the buffer is not padded to 64 elements. - #if defined(USE_AVX2) - using vec_t = __m256i; - #define vec_setzero() _mm256_setzero_si256() - #define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32 - #define vec_hadd SIMD::m256_hadd - #elif defined(USE_SSSE3) - using vec_t = __m128i; - #define vec_setzero() _mm_setzero_si128() - #define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32 - #define vec_hadd SIMD::m128_hadd - #elif defined(USE_NEON_DOTPROD) - using vec_t = int32x4_t; - #define vec_setzero() vdupq_n_s32(0) - #define vec_add_dpbusd_32(acc, a, b) \ - SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \ - vreinterpretq_s8_s32(b)) - #define vec_hadd SIMD::neon_m128_hadd - #endif - - const auto inputVector = reinterpret_cast(input); - - static constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(InputType); - - static_assert(PaddedInputDimensions % InputSimdWidth == 0); - - constexpr IndexType NumChunks = PaddedInputDimensions / InputSimdWidth; - vec_t sum0 = vec_setzero(); - const auto row0 = reinterpret_cast(&weights[0]); - - for (int j = 0; j < int(NumChunks); ++j) - { - const vec_t in = inputVector[j]; - vec_add_dpbusd_32(sum0, in, row0[j]); - } - output[0] = vec_hadd(sum0, biases[0]); - - #undef vec_setzero - #undef vec_add_dpbusd_32 - #undef vec_hadd - } -#else - // Use old implementation for the other architectures. - affine_transform_non_ssse3( - output, weights, biases, input); -#endif - } - - private: - using BiasType = OutputType; - using WeightType = std::int8_t; - - alignas(CacheLineSize) BiasType biases[OutputDimensions]; - alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; -}; - -} // namespace Stockfish::Eval::NNUE::Layers - -#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED diff --git a/src/nnue/layers/affine_transform_sparse_input.h b/src/nnue/layers/affine_transform_sparse_input.h deleted file mode 100644 index 059a773974b560ee5712143fe3aa09987eae0fbd..0000000000000000000000000000000000000000 --- a/src/nnue/layers/affine_transform_sparse_input.h +++ /dev/null @@ -1,379 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Definition of layer AffineTransformSparseInput of NNUE evaluation function - -#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED -#define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED - -#include -#include -#include -#include - -#include "../../bitboard.h" -#include "../../memory.h" -#include "../simd.h" -#include "../nnue_common.h" - -/* - This file contains the definition for a fully connected layer (aka affine transform) with block sparse input. -*/ - -namespace Stockfish::Eval::NNUE::Layers { - -#if (USE_SSSE3 | (USE_NEON >= 8)) -static constexpr int lsb_index64[64] = { - 0, 47, 1, 56, 48, 27, 2, 60, 57, 49, 41, 37, 28, 16, 3, 61, 54, 58, 35, 52, 50, 42, - 21, 44, 38, 32, 29, 23, 17, 11, 4, 62, 46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, - 31, 22, 10, 45, 25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63}; - -constexpr int constexpr_lsb(uint64_t bb) { - assert(bb != 0); - constexpr uint64_t debruijn64 = 0x03F79D71B4CB0A89ULL; - return lsb_index64[((bb ^ (bb - 1)) * debruijn64) >> 58]; -} - -alignas(CacheLineSize) static constexpr struct OffsetIndices { - - std::uint16_t offset_indices[256][8]; - - constexpr OffsetIndices() : - offset_indices() { - for (int i = 0; i < 256; ++i) - { - std::uint64_t j = i, k = 0; - while (j) - { - offset_indices[i][k++] = constexpr_lsb(j); - j &= j - 1; - } - while (k < 8) - offset_indices[i][k++] = 0; - } - } - -} Lookup; - - #if defined(__GNUC__) || defined(__clang__) - #define RESTRICT __restrict__ - #elif defined(_MSC_VER) - #define RESTRICT __restrict - #else - #define RESTRICT - #endif - -// Find indices of nonzero 32-bit values in a packed byte buffer. -// The input pointer addresses a sequence of 32-bit blocks stored in a -// std::uint8_t array. -template -void find_nnz(const std::uint8_t* RESTRICT input, - std::uint16_t* RESTRICT out, - IndexType& count_out) { - - #if defined(USE_AVX512ICL) - - constexpr IndexType SimdWidthIn = 64; // 512 bits - constexpr IndexType SimdWidthOut = 32; // 512 bits / 16 bits - constexpr IndexType NumChunks = InputDimensions / SimdWidthOut; - const __m512i increment = _mm512_set1_epi16(SimdWidthOut); - __m512i base = _mm512_set_epi16( // Same permute order as _mm512_packus_epi32() - 31, 30, 29, 28, 15, 14, 13, 12, 27, 26, 25, 24, 11, 10, 9, 8, 23, 22, 21, 20, 7, 6, 5, 4, 19, - 18, 17, 16, 3, 2, 1, 0); - - IndexType count = 0; - for (IndexType i = 0; i < NumChunks; ++i) - { - const __m512i inputV0 = _mm512_load_si512(input + i * 2 * SimdWidthIn); - const __m512i inputV1 = _mm512_load_si512(input + i * 2 * SimdWidthIn + SimdWidthIn); - - // Get a bitmask and gather non zero indices - const __m512i inputV01 = _mm512_packus_epi32(inputV0, inputV1); - const __mmask32 nnzMask = _mm512_test_epi16_mask(inputV01, inputV01); - - // Avoid _mm512_mask_compressstoreu_epi16() as it's 256 uOps on Zen4 - __m512i nnz = _mm512_maskz_compress_epi16(nnzMask, base); - _mm512_storeu_si512(out + count, nnz); - - count += popcount(nnzMask); - base = _mm512_add_epi16(base, increment); - } - count_out = count; - - #elif defined(USE_AVX512) - - constexpr IndexType SimdWidth = 16; // 512 bits / 32 bits - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m512i increment = _mm512_set1_epi32(SimdWidth); - __m512i base = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - - IndexType count = 0; - for (IndexType i = 0; i < NumChunks; ++i) - { - const __m512i inputV = _mm512_load_si512(input + i * SimdWidth * sizeof(std::uint32_t)); - - // Get a bitmask and gather non zero indices - const __mmask16 nnzMask = _mm512_test_epi32_mask(inputV, inputV); - const __m512i nnzV = _mm512_maskz_compress_epi32(nnzMask, base); - _mm512_mask_cvtepi32_storeu_epi16(out + count, 0xFFFF, nnzV); - count += popcount(nnzMask); - base = _mm512_add_epi32(base, increment); - } - count_out = count; - - #else - - using namespace SIMD; - - constexpr IndexType InputSimdWidth = sizeof(vec_uint_t) / sizeof(std::int32_t); - // Outputs are processed 8 elements at a time, even if the SIMD width is narrower - constexpr IndexType ChunkSize = 8; - constexpr IndexType NumChunks = InputDimensions / ChunkSize; - constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth; - - static_assert(InputsPerChunk > 0 && "SIMD width too wide"); - - const auto inputVector = reinterpret_cast(input); - IndexType count = 0; - vec128_t base = vec128_zero; - const vec128_t increment = vec128_set_16(8); - for (IndexType i = 0; i < NumChunks; ++i) - { - // bitmask of nonzero values in this chunk - unsigned nnz = 0; - for (IndexType j = 0; j < InputsPerChunk; ++j) - { - const vec_uint_t inputChunk = inputVector[i * InputsPerChunk + j]; - nnz |= unsigned(vec_nnz(inputChunk)) << (j * InputSimdWidth); - } - const vec128_t offsets = - vec128_load(reinterpret_cast(&Lookup.offset_indices[nnz])); - vec128_storeu(reinterpret_cast(out + count), vec128_add(base, offsets)); - count += popcount(nnz); - base = vec128_add(base, increment); - } - count_out = count; - #endif -} - -#endif - -// Sparse input implementation -template -class AffineTransformSparseInput { - public: - // Input/output type - using InputType = std::uint8_t; - using OutputType = std::int32_t; - - // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; - static constexpr IndexType OutputDimensions = OutDims; - - static_assert(OutputDimensions % 16 == 0, - "Only implemented for OutputDimensions divisible by 16."); - - static constexpr IndexType PaddedInputDimensions = - ceil_to_multiple(InputDimensions, MaxSimdWidth); - static constexpr IndexType PaddedOutputDimensions = - ceil_to_multiple(OutputDimensions, MaxSimdWidth); - -#if (USE_SSSE3 | (USE_NEON >= 8)) - static constexpr IndexType ChunkSize = 4; -#else - static constexpr IndexType ChunkSize = 1; -#endif - - using OutputBuffer = OutputType[PaddedOutputDimensions]; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0xCC03DAE4u; - hashValue += OutputDimensions; - hashValue ^= prevHash >> 1; - hashValue ^= prevHash << 31; - return hashValue; - } - - static constexpr IndexType get_weight_index_scrambled(IndexType i) { - return (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize - + i / PaddedInputDimensions * ChunkSize + i % ChunkSize; - } - - static constexpr IndexType get_weight_index(IndexType i) { -#if (USE_SSSE3 | (USE_NEON >= 8)) - return get_weight_index_scrambled(i); -#else - return i; -#endif - } - - // Read network parameters - bool read_parameters(std::istream& stream) { - read_little_endian(stream, biases, OutputDimensions); - for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - weights[get_weight_index(i)] = read_little_endian(stream); - - return !stream.fail(); - } - - // Write network parameters - bool write_parameters(std::ostream& stream) const { - write_little_endian(stream, biases, OutputDimensions); - - for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - write_little_endian(stream, weights[get_weight_index(i)]); - - return !stream.fail(); - } - - std::size_t get_content_hash() const { - std::size_t h = 0; - hash_combine(h, get_raw_data_hash(biases)); - hash_combine(h, get_raw_data_hash(weights)); - hash_combine(h, get_hash_value(0)); - return h; - } - - // Forward propagation - void propagate(const InputType* input, OutputType* output) const { - -#if (USE_SSSE3 | (USE_NEON >= 8)) - #if defined(USE_AVX512) - using invec_t = __m512i; - using outvec_t = __m512i; - #define vec_add_32 _mm512_add_epi32 - #define vec_set_32 _mm512_set1_epi32 - #define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32 - #elif defined(USE_AVX2) - using invec_t = __m256i; - using outvec_t = __m256i; - #define vec_add_32 _mm256_add_epi32 - #define vec_set_32 _mm256_set1_epi32 - #define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32 - #elif defined(USE_SSSE3) - using invec_t = __m128i; - using outvec_t = __m128i; - #define vec_set_32 _mm_set1_epi32 - #define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32 - #elif defined(USE_NEON_DOTPROD) - using invec_t = int8x16_t; - using outvec_t = int32x4_t; - #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) - #define vec_add_dpbusd_32 SIMD::dotprod_m128_add_dpbusd_epi32 - #elif defined(USE_NEON) - using invec_t = int8x16_t; - using outvec_t = int32x4_t; - #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) - #define vec_add_dpbusd_32 SIMD::neon_m128_add_dpbusd_epi32 - #endif - constexpr IndexType OutputSimdWidth = sizeof(outvec_t) / sizeof(OutputType); - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / ChunkSize; - constexpr IndexType NumAccums = OutputDimensions / OutputSimdWidth; - // If we're using high-latency dot product instructions, split the accumulators - // to create 3 separate dependency chains and merge at the end - constexpr IndexType NumRegs = - #if defined(USE_VNNI) - 3 * NumAccums; - #else - NumAccums; - #endif - std::uint16_t nnz[NumChunks]; - IndexType count; - - // Find indices of nonzero 32-bit blocks - find_nnz(input, nnz, count); - - const outvec_t* biasvec = reinterpret_cast(biases); - outvec_t acc[NumRegs]; - for (IndexType k = 0; k < NumAccums; ++k) - acc[k] = biasvec[k]; - - const auto* start = nnz; - const auto* end = nnz + count; - - // convince GCC to not do weird pointer arithmetic in the following loop - const std::int8_t* weights_cp = weights; - #if defined(USE_VNNI) - for (IndexType k = NumAccums; k < NumRegs; ++k) - acc[k] = vec_zero(); - - while (start < end - 2) - { - const std::ptrdiff_t i0 = *start++; - const std::ptrdiff_t i1 = *start++; - const std::ptrdiff_t i2 = *start++; - const invec_t in0 = - vec_set_32(load_as(input + i0 * sizeof(std::int32_t))); - const invec_t in1 = - vec_set_32(load_as(input + i1 * sizeof(std::int32_t))); - const invec_t in2 = - vec_set_32(load_as(input + i2 * sizeof(std::int32_t))); - const auto col0 = - reinterpret_cast(&weights_cp[i0 * OutputDimensions * ChunkSize]); - const auto col1 = - reinterpret_cast(&weights_cp[i1 * OutputDimensions * ChunkSize]); - const auto col2 = - reinterpret_cast(&weights_cp[i2 * OutputDimensions * ChunkSize]); - for (IndexType k = 0; k < NumAccums; ++k) - { - vec_add_dpbusd_32(acc[k], in0, col0[k]); - vec_add_dpbusd_32(acc[k + NumAccums], in1, col1[k]); - vec_add_dpbusd_32(acc[k + 2 * NumAccums], in2, col2[k]); - } - } - for (IndexType k = 0; k < NumAccums; ++k) - acc[k] = vec_add_32(vec_add_32(acc[k], acc[k + NumAccums]), acc[k + 2 * NumAccums]); - #endif - while (start < end) - { - const std::ptrdiff_t i = *start++; - const invec_t in = vec_set_32(load_as(input + i * sizeof(std::int32_t))); - const auto col = - reinterpret_cast(&weights_cp[i * OutputDimensions * ChunkSize]); - for (IndexType k = 0; k < NumAccums; ++k) - vec_add_dpbusd_32(acc[k], in, col[k]); - } - - outvec_t* outptr = reinterpret_cast(output); - for (IndexType k = 0; k < NumAccums; ++k) - outptr[k] = acc[k]; - - #undef vec_set_32 - #undef vec_add_dpbusd_32 - #ifdef vec_add_32 - #undef vec_add_32 - #endif -#else - // Use dense implementation for the other architectures. - affine_transform_non_ssse3( - output, weights, biases, input); -#endif - } - - private: - using BiasType = OutputType; - using WeightType = std::int8_t; - - alignas(CacheLineSize) BiasType biases[OutputDimensions]; - alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; -}; - -} // namespace Stockfish::Eval::NNUE::Layers - -#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h deleted file mode 100644 index 9ce85d3fa79c2be45a41f1eb47e7b737222a5cc5..0000000000000000000000000000000000000000 --- a/src/nnue/layers/clipped_relu.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Definition of layer ClippedReLU of NNUE evaluation function - -#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED -#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED - -#include -#include -#include - -#include "../nnue_common.h" - -namespace Stockfish::Eval::NNUE::Layers { - -// Clipped ReLU -template -class ClippedReLU { - public: - // Input/output type - using InputType = std::int32_t; - using OutputType = std::uint8_t; - - // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; - static constexpr IndexType OutputDimensions = InputDimensions; - static constexpr IndexType PaddedOutputDimensions = - ceil_to_multiple(OutputDimensions, 32); - - using OutputBuffer = OutputType[PaddedOutputDimensions]; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0x538D24C7u; - hashValue += prevHash; - return hashValue; - } - - // Read network parameters - bool read_parameters(std::istream&) { return true; } - - // Write network parameters - bool write_parameters(std::ostream&) const { return true; } - - std::size_t get_content_hash() const { - std::size_t h = 0; - hash_combine(h, get_hash_value(0)); - return h; - } - - // Forward propagation - void propagate(const InputType* input, OutputType* output) const { - -#if defined(USE_AVX2) - if constexpr (InputDimensions % SimdWidth == 0) - { - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m256i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) - { - const __m256i words0 = - _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 0]), - _mm256_load_si256(&in[i * 4 + 1])), - WeightScaleBits); - const __m256i words1 = - _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 2]), - _mm256_load_si256(&in[i * 4 + 3])), - WeightScaleBits); - _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32( - _mm256_packs_epi16(words0, words1), Offsets)); - } - } - else - { - constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m128i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) - { - const __m128i words0 = _mm_srli_epi16( - _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), - WeightScaleBits); - const __m128i words1 = _mm_srli_epi16( - _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), - WeightScaleBits); - _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); - } - } - constexpr IndexType Start = InputDimensions % SimdWidth == 0 - ? InputDimensions / SimdWidth * SimdWidth - : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2); - -#elif defined(USE_SSE2) - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - - #ifndef USE_SSE41 - const __m128i k0x80s = _mm_set1_epi8(-128); - #endif - - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m128i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) - { - #if defined(USE_SSE41) - const __m128i words0 = _mm_srli_epi16( - _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), - WeightScaleBits); - const __m128i words1 = _mm_srli_epi16( - _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), - WeightScaleBits); - _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); - #else - const __m128i words0 = _mm_srai_epi16( - _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), - WeightScaleBits); - const __m128i words1 = _mm_srai_epi16( - _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), - WeightScaleBits); - const __m128i packedbytes = _mm_packs_epi16(words0, words1); - _mm_store_si128(&out[i], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)); - #endif - } - constexpr IndexType Start = NumChunks * SimdWidth; - -#elif defined(USE_NEON) - constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); - const SIMD::vec_i8x8_t Zero = {0}; - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast(output); - for (IndexType i = 0; i < NumChunks; ++i) - { - int16x8_t shifted; - const auto pack = reinterpret_cast(&shifted); - pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits); - pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits); - out[i] = vmax_s8(vqmovn_s16(shifted), Zero); - } - constexpr IndexType Start = NumChunks * (SimdWidth / 2); -#else - constexpr IndexType Start = 0; -#endif - - for (IndexType i = Start; i < InputDimensions; ++i) - { - output[i] = static_cast(std::clamp(input[i] >> WeightScaleBits, 0, 127)); - } - } -}; - -} // namespace Stockfish::Eval::NNUE::Layers - -#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED diff --git a/src/nnue/layers/sqr_clipped_relu.h b/src/nnue/layers/sqr_clipped_relu.h deleted file mode 100644 index 53412d014a42fc4d45c79f4f84e93f3efcbfb965..0000000000000000000000000000000000000000 --- a/src/nnue/layers/sqr_clipped_relu.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Definition of layer ClippedReLU of NNUE evaluation function - -#ifndef NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED -#define NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED - -#include -#include -#include - -#include "../nnue_common.h" - -namespace Stockfish::Eval::NNUE::Layers { - -// Clipped ReLU -template -class SqrClippedReLU { - public: - // Input/output type - using InputType = std::int32_t; - using OutputType = std::uint8_t; - - // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; - static constexpr IndexType OutputDimensions = InputDimensions; - static constexpr IndexType PaddedOutputDimensions = - ceil_to_multiple(OutputDimensions, 32); - - using OutputBuffer = OutputType[PaddedOutputDimensions]; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0x538D24C7u; - hashValue += prevHash; - return hashValue; - } - - // Read network parameters - bool read_parameters(std::istream&) { return true; } - - // Write network parameters - bool write_parameters(std::ostream&) const { return true; } - - std::size_t get_content_hash() const { - std::size_t h = 0; - hash_combine(h, get_hash_value(0)); - return h; - } - - // Forward propagation - void propagate(const InputType* input, OutputType* output) const { - -#if defined(USE_SSE2) - constexpr IndexType NumChunks = InputDimensions / 16; - - static_assert(WeightScaleBits == 6); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m128i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) - { - __m128i words0 = - _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])); - __m128i words1 = - _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])); - - // We shift by WeightScaleBits * 2 = 12 and divide by 128 - // which is an additional shift-right of 7, meaning 19 in total. - // MulHi strips the lower 16 bits so we need to shift out 3 more to match. - words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3); - words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3); - - _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); - } - constexpr IndexType Start = NumChunks * 16; - -#else - constexpr IndexType Start = 0; -#endif - - for (IndexType i = Start; i < InputDimensions; ++i) - { - output[i] = static_cast( - // Really should be /127 but we need to make it fast so we right-shift - // by an extra 7 bits instead. Needs to be accounted for in the trainer. - std::min(127ll, ((long long) (input[i]) * input[i]) >> (2 * WeightScaleBits + 7))); - } - } -}; - -} // namespace Stockfish::Eval::NNUE::Layers - -#endif // NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp deleted file mode 100644 index 50722d62436b9f29daa134825fba2d6cd98cc8ad..0000000000000000000000000000000000000000 --- a/src/nnue/network.cpp +++ /dev/null @@ -1,416 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "network.h" - -#include -#include -#include -#include -#include -#include - -#define INCBIN_SILENCE_BITCODE_WARNING -#include "../incbin/incbin.h" - -#include "../evaluate.h" -#include "../misc.h" -#include "../position.h" -#include "../types.h" -#include "nnue_architecture.h" -#include "nnue_common.h" -#include "nnue_misc.h" - -// Macro to embed the default efficiently updatable neural network (NNUE) file -// data in the engine binary (using incbin.h, by Dale Weiler). -// This macro invocation will declare the following three variables -// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data -// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end -// const unsigned int gEmbeddedNNUESize; // the size of the embedded file -// Note that this does not work in Microsoft Visual Studio. -#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) -INCBIN(EmbeddedNNUEBig, EvalFileDefaultNameBig); -INCBIN(EmbeddedNNUESmall, EvalFileDefaultNameSmall); -#else -const unsigned char gEmbeddedNNUEBigData[1] = {0x0}; -const unsigned char* const gEmbeddedNNUEBigEnd = &gEmbeddedNNUEBigData[1]; -const unsigned int gEmbeddedNNUEBigSize = 1; -const unsigned char gEmbeddedNNUESmallData[1] = {0x0}; -const unsigned char* const gEmbeddedNNUESmallEnd = &gEmbeddedNNUESmallData[1]; -const unsigned int gEmbeddedNNUESmallSize = 1; -#endif - -namespace { - -struct EmbeddedNNUE { - EmbeddedNNUE(const unsigned char* embeddedData, - const unsigned char* embeddedEnd, - const unsigned int embeddedSize) : - data(embeddedData), - end(embeddedEnd), - size(embeddedSize) {} - const unsigned char* data; - const unsigned char* end; - const unsigned int size; -}; - -using namespace Stockfish::Eval::NNUE; - -EmbeddedNNUE get_embedded(EmbeddedNNUEType type) { - if (type == EmbeddedNNUEType::BIG) - return EmbeddedNNUE(gEmbeddedNNUEBigData, gEmbeddedNNUEBigEnd, gEmbeddedNNUEBigSize); - else - return EmbeddedNNUE(gEmbeddedNNUESmallData, gEmbeddedNNUESmallEnd, gEmbeddedNNUESmallSize); -} - -} - - -namespace Stockfish::Eval::NNUE { - - -namespace Detail { - -// Read evaluation function parameters -template -bool read_parameters(std::istream& stream, T& reference) { - - std::uint32_t header; - header = read_little_endian(stream); - if (!stream || (false && header != T::get_hash_value())) - return false; - return reference.read_parameters(stream); -} - -// Write evaluation function parameters -template -bool write_parameters(std::ostream& stream, const T& reference) { - - write_little_endian(stream, T::get_hash_value()); - return reference.write_parameters(stream); -} - -} // namespace Detail - -template -void Network::load(const std::string& rootDirectory, std::string evalfilePath) { -#if defined(DEFAULT_NNUE_DIRECTORY) - std::vector dirs = {"", "", rootDirectory, - stringify(DEFAULT_NNUE_DIRECTORY)}; -#else - std::vector dirs = {"", "", rootDirectory}; -#endif - - if (evalfilePath.empty()) - evalfilePath = evalFile.defaultName; - - for (const auto& directory : dirs) - { - if (std::string(evalFile.current) != evalfilePath) - { - if (directory != "") - { - load_user_net(directory, evalfilePath); - } - - if (directory == "" && evalfilePath == std::string(evalFile.defaultName)) - { - load_internal(); - } - } - } -} - - -template -bool Network::save(const std::optional& filename) const { - std::string actualFilename; - std::string msg; - - if (filename.has_value()) - actualFilename = filename.value(); - else - { - if (std::string(evalFile.current) != std::string(evalFile.defaultName)) - { - msg = "Failed to export a net. " - "A non-embedded net can only be saved if the filename is specified"; - - sync_cout << msg << sync_endl; - return false; - } - - actualFilename = evalFile.defaultName; - } - - std::ofstream stream(actualFilename, std::ios_base::binary); - bool saved = save(stream, evalFile.current, evalFile.netDescription); - - msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net"; - - sync_cout << msg << sync_endl; - return saved; -} - - -template -NetworkOutput -Network::evaluate(const Position& pos, - AccumulatorStack& accumulatorStack, - AccumulatorCaches::Cache& cache) const { - - constexpr uint64_t alignment = CacheLineSize; - - alignas(alignment) - TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; - - ASSERT_ALIGNED(transformedFeatures, alignment); - - const int bucket = (pos.count() - 1) / 4; - const auto psqt = - featureTransformer.transform(pos, accumulatorStack, cache, transformedFeatures, bucket); - const auto positional = network[bucket].propagate(transformedFeatures); - return {static_cast(psqt / OutputScale), static_cast(positional / OutputScale)}; -} - - -template -void Network::verify(std::string evalfilePath, - const std::function& f) const { - if (evalfilePath.empty()) - evalfilePath = evalFile.defaultName; - - if (std::string(evalFile.current) != evalfilePath) - { - if (f) - { - std::string msg1 = - "Network evaluation parameters compatible with the engine must be available."; - std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully."; - std::string msg3 = "The UCI option EvalFile might need to specify the full path, " - "including the directory name, to the network file."; - std::string msg4 = "The default net can be downloaded from: " - "https://tests.stockfishchess.org/api/nn/" - + std::string(evalFile.defaultName); - std::string msg5 = "The engine will be terminated now."; - - std::string msg = "ERROR: " + msg1 + '\n' + "ERROR: " + msg2 + '\n' + "ERROR: " + msg3 - + '\n' + "ERROR: " + msg4 + '\n' + "ERROR: " + msg5 + '\n'; - - f(msg); - } - - exit(EXIT_FAILURE); - } - - if (f) - { - size_t size = sizeof(featureTransformer) + sizeof(Arch) * LayerStacks; - f("NNUE evaluation using " + evalfilePath + " (" + std::to_string(size / (1024 * 1024)) - + "MiB, (" + std::to_string(featureTransformer.TotalInputDimensions) + ", " - + std::to_string(network[0].TransformedFeatureDimensions) + ", " - + std::to_string(network[0].FC_0_OUTPUTS) + ", " + std::to_string(network[0].FC_1_OUTPUTS) - + ", 1))"); - } -} - - -template -NnueEvalTrace -Network::trace_evaluate(const Position& pos, - AccumulatorStack& accumulatorStack, - AccumulatorCaches::Cache& cache) const { - - constexpr uint64_t alignment = CacheLineSize; - - alignas(alignment) - TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; - - ASSERT_ALIGNED(transformedFeatures, alignment); - - NnueEvalTrace t{}; - t.correctBucket = (pos.count() - 1) / 4; - for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) - { - const auto materialist = - featureTransformer.transform(pos, accumulatorStack, cache, transformedFeatures, bucket); - const auto positional = network[bucket].propagate(transformedFeatures); - - t.psqt[bucket] = static_cast(materialist / OutputScale); - t.positional[bucket] = static_cast(positional / OutputScale); - } - - return t; -} - - -template -void Network::load_user_net(const std::string& dir, - const std::string& evalfilePath) { - std::ifstream stream(dir + evalfilePath, std::ios::binary); - auto description = load(stream); - - if (description.has_value()) - { - evalFile.current = evalfilePath; - evalFile.netDescription = description.value(); - } -} - - -template -void Network::load_internal() { - // C++ way to prepare a buffer for a memory stream - class MemoryBuffer: public std::basic_streambuf { - public: - MemoryBuffer(char* p, size_t n) { - setg(p, p, p + n); - setp(p, p + n); - } - }; - - const auto embedded = get_embedded(embeddedType); - - MemoryBuffer buffer(const_cast(reinterpret_cast(embedded.data)), - size_t(embedded.size)); - - std::istream stream(&buffer); - auto description = load(stream); - - if (description.has_value()) - { - evalFile.current = evalFile.defaultName; - evalFile.netDescription = description.value(); - } -} - - -template -void Network::initialize() { - initialized = true; -} - - -template -bool Network::save(std::ostream& stream, - const std::string& name, - const std::string& netDescription) const { - if (name.empty() || name == "None") - return false; - - return write_parameters(stream, netDescription); -} - - -template -std::optional Network::load(std::istream& stream) { - initialize(); - std::string description; - - return read_parameters(stream, description) ? std::make_optional(description) : std::nullopt; -} - - -template -std::size_t Network::get_content_hash() const { - if (!initialized) - return 0; - - std::size_t h = 0; - hash_combine(h, featureTransformer); - for (auto&& layerstack : network) - hash_combine(h, layerstack); - hash_combine(h, evalFile); - hash_combine(h, static_cast(embeddedType)); - return h; -} - -// Read network header -template -bool Network::read_header(std::istream& stream, - std::uint32_t* hashValue, - std::string* desc) const { - std::uint32_t version, size; - - version = read_little_endian(stream); - *hashValue = read_little_endian(stream); - size = read_little_endian(stream); - if (!stream || version != Version) - return false; - desc->resize(size); - stream.read(&(*desc)[0], size); - return !stream.fail(); -} - - -// Write network header -template -bool Network::write_header(std::ostream& stream, - std::uint32_t hashValue, - const std::string& desc) const { - write_little_endian(stream, Version); - write_little_endian(stream, hashValue); - write_little_endian(stream, std::uint32_t(desc.size())); - stream.write(&desc[0], desc.size()); - return !stream.fail(); -} - - -template -bool Network::read_parameters(std::istream& stream, - std::string& netDescription) { - std::uint32_t hashValue; - if (!read_header(stream, &hashValue, &netDescription)) - return false; - if (false && hashValue != Network::hash) - return false; - if (!Detail::read_parameters(stream, featureTransformer)) - return false; - for (std::size_t i = 0; i < LayerStacks; ++i) - { - if (!Detail::read_parameters(stream, network[i])) - return false; - } - return stream && stream.peek() == std::ios::traits_type::eof(); -} - - -template -bool Network::write_parameters(std::ostream& stream, - const std::string& netDescription) const { - if (!write_header(stream, Network::hash, netDescription)) - return false; - if (!Detail::write_parameters(stream, featureTransformer)) - return false; - for (std::size_t i = 0; i < LayerStacks; ++i) - { - if (!Detail::write_parameters(stream, network[i])) - return false; - } - return bool(stream); -} - -// Explicit template instantiations - -template class Network, - FeatureTransformer>; - -template class Network, - FeatureTransformer>; - -} // namespace Stockfish::Eval::NNUE diff --git a/src/nnue/network.h b/src/nnue/network.h deleted file mode 100644 index cb433718d43f1f9c25b4ffb6dc9005dcd465b32d..0000000000000000000000000000000000000000 --- a/src/nnue/network.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef NETWORK_H_INCLUDED -#define NETWORK_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../misc.h" -#include "../types.h" -#include "nnue_accumulator.h" -#include "nnue_architecture.h" -#include "nnue_common.h" -#include "nnue_feature_transformer.h" -#include "nnue_misc.h" - -namespace Stockfish { -class Position; -} - -namespace Stockfish::Eval::NNUE { - -enum class EmbeddedNNUEType { - BIG, - SMALL, -}; - -using NetworkOutput = std::tuple; - -// The network must be a trivial type, i.e. the memory must be in-line. -// This is required to allow sharing the network via shared memory, as -// there is no way to run destructors. -template -class Network { - static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions; - - public: - Network(EvalFile file, EmbeddedNNUEType type) : - evalFile(file), - embeddedType(type) {} - - Network(const Network& other) = default; - Network(Network&& other) = default; - - Network& operator=(const Network& other) = default; - Network& operator=(Network&& other) = default; - - void load(const std::string& rootDirectory, std::string evalfilePath); - bool save(const std::optional& filename) const; - - std::size_t get_content_hash() const; - - NetworkOutput evaluate(const Position& pos, - AccumulatorStack& accumulatorStack, - AccumulatorCaches::Cache& cache) const; - - - void verify(std::string evalfilePath, const std::function&) const; - NnueEvalTrace trace_evaluate(const Position& pos, - AccumulatorStack& accumulatorStack, - AccumulatorCaches::Cache& cache) const; - - private: - void load_user_net(const std::string&, const std::string&); - void load_internal(); - - void initialize(); - - bool save(std::ostream&, const std::string&, const std::string&) const; - std::optional load(std::istream&); - - bool read_header(std::istream&, std::uint32_t*, std::string*) const; - bool write_header(std::ostream&, std::uint32_t, const std::string&) const; - - bool read_parameters(std::istream&, std::string&); - bool write_parameters(std::ostream&, const std::string&) const; - - // Input feature converter - Transformer featureTransformer; - - // Evaluation function - Arch network[LayerStacks]; - - EvalFile evalFile; - EmbeddedNNUEType embeddedType; - - bool initialized = false; - - // Hash value of evaluation function structure - static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value(); - - template - friend struct AccumulatorCaches::Cache; -}; - -// Definitions of the network types -using SmallFeatureTransformer = FeatureTransformer; -using SmallNetworkArchitecture = - NetworkArchitecture; - -using BigFeatureTransformer = FeatureTransformer; -using BigNetworkArchitecture = NetworkArchitecture; - -using NetworkBig = Network; -using NetworkSmall = Network; - - -struct Networks { - Networks(EvalFile bigFile, EvalFile smallFile) : - big(bigFile, EmbeddedNNUEType::BIG), - small(smallFile, EmbeddedNNUEType::SMALL) {} - - NetworkBig big; - NetworkSmall small; -}; - - -} // namespace Stockfish - -template -struct std::hash> { - std::size_t operator()( - const Stockfish::Eval::NNUE::Network& network) const noexcept { - return network.get_content_hash(); - } -}; - -template<> -struct std::hash { - std::size_t operator()(const Stockfish::Eval::NNUE::Networks& networks) const noexcept { - std::size_t h = 0; - Stockfish::hash_combine(h, networks.big); - Stockfish::hash_combine(h, networks.small); - return h; - } -}; - -#endif diff --git a/src/nnue/nnue_accumulator.cpp b/src/nnue/nnue_accumulator.cpp deleted file mode 100644 index 3f588e37d2bbdacf91b8a04068c019520bed24e5..0000000000000000000000000000000000000000 --- a/src/nnue/nnue_accumulator.cpp +++ /dev/null @@ -1,952 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "nnue_accumulator.h" - -#include -#include -#include -#include - -#include "../bitboard.h" -#include "../misc.h" -#include "../position.h" -#include "../types.h" -#include "features/half_ka_v2_hm.h" -#include "nnue_architecture.h" -#include "nnue_common.h" -#include "nnue_feature_transformer.h" // IWYU pragma: keep -#include "simd.h" - -namespace Stockfish::Eval::NNUE { - -using namespace SIMD; - -namespace { - -template -void double_inc_update(Color perspective, - const FeatureTransformer& featureTransformer, - const Square ksq, - AccumulatorState& middle_state, - AccumulatorState& target_state, - const AccumulatorState& computed); - -template -void double_inc_update(Color perspective, - const FeatureTransformer& featureTransformer, - const Square ksq, - AccumulatorState& middle_state, - AccumulatorState& target_state, - const AccumulatorState& computed, - const DirtyPiece& dp2); - -template -void update_accumulator_incremental( - Color perspective, - const FeatureTransformer& featureTransformer, - const Square ksq, - AccumulatorState& target_state, - const AccumulatorState& computed); - -template -void update_accumulator_refresh_cache(Color perspective, - const FeatureTransformer& featureTransformer, - const Position& pos, - AccumulatorState& accumulatorState, - AccumulatorCaches::Cache& cache); - -template -void update_threats_accumulator_full(Color perspective, - const FeatureTransformer& featureTransformer, - const Position& pos, - AccumulatorState& accumulatorState); -} - -template -const AccumulatorState& AccumulatorStack::latest() const noexcept { - return accumulators()[size - 1]; -} - -// Explicit template instantiations -template const AccumulatorState& AccumulatorStack::latest() const noexcept; -template const AccumulatorState& AccumulatorStack::latest() const noexcept; - -template -AccumulatorState& AccumulatorStack::mut_latest() noexcept { - return mut_accumulators()[size - 1]; -} - -template -const std::array, AccumulatorStack::MaxSize>& -AccumulatorStack::accumulators() const noexcept { - static_assert(std::is_same_v || std::is_same_v, - "Invalid Feature Set Type"); - - if constexpr (std::is_same_v) - return psq_accumulators; - - if constexpr (std::is_same_v) - return threat_accumulators; -} - -template -std::array, AccumulatorStack::MaxSize>& -AccumulatorStack::mut_accumulators() noexcept { - static_assert(std::is_same_v || std::is_same_v, - "Invalid Feature Set Type"); - - if constexpr (std::is_same_v) - return psq_accumulators; - - if constexpr (std::is_same_v) - return threat_accumulators; -} - -void AccumulatorStack::reset() noexcept { - psq_accumulators[0].reset({}); - threat_accumulators[0].reset({}); - size = 1; -} - -std::pair AccumulatorStack::push() noexcept { - assert(size < MaxSize); - auto& dp = psq_accumulators[size].reset(); - auto& dts = threat_accumulators[size].reset(); - new (&dts) DirtyThreats; - size++; - return {dp, dts}; -} - -void AccumulatorStack::pop() noexcept { - assert(size > 1); - size--; -} - -template -void AccumulatorStack::evaluate(const Position& pos, - const FeatureTransformer& featureTransformer, - AccumulatorCaches::Cache& cache) noexcept { - constexpr bool UseThreats = (Dimensions == TransformedFeatureDimensionsBig); - - evaluate_side(WHITE, pos, featureTransformer, cache); - - if (UseThreats) - evaluate_side(WHITE, pos, featureTransformer, cache); - - evaluate_side(BLACK, pos, featureTransformer, cache); - - if (UseThreats) - evaluate_side(BLACK, pos, featureTransformer, cache); -} - -template -void AccumulatorStack::evaluate_side(Color perspective, - const Position& pos, - const FeatureTransformer& featureTransformer, - AccumulatorCaches::Cache& cache) noexcept { - - const auto last_usable_accum = - find_last_usable_accumulator(perspective); - - if ((accumulators()[last_usable_accum].template acc()) - .computed[perspective]) - forward_update_incremental(perspective, pos, featureTransformer, - last_usable_accum); - - else - { - if constexpr (std::is_same_v) - update_accumulator_refresh_cache(perspective, featureTransformer, pos, - mut_latest(), cache); - else - update_threats_accumulator_full(perspective, featureTransformer, pos, - mut_latest()); - - backward_update_incremental(perspective, pos, featureTransformer, - last_usable_accum); - } -} - -// Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator -// state just before a change that requires full refresh. -template -std::size_t AccumulatorStack::find_last_usable_accumulator(Color perspective) const noexcept { - - for (std::size_t curr_idx = size - 1; curr_idx > 0; curr_idx--) - { - if ((accumulators()[curr_idx].template acc()).computed[perspective]) - return curr_idx; - - if (FeatureSet::requires_refresh(accumulators()[curr_idx].diff, perspective)) - return curr_idx; - } - - return 0; -} - -template -void AccumulatorStack::forward_update_incremental( - Color perspective, - const Position& pos, - const FeatureTransformer& featureTransformer, - const std::size_t begin) noexcept { - - assert(begin < accumulators().size()); - assert((accumulators()[begin].template acc()).computed[perspective]); - - const Square ksq = pos.square(perspective); - - for (std::size_t next = begin + 1; next < size; next++) - { - if (next + 1 < size) - { - DirtyPiece& dp1 = mut_accumulators()[next].diff; - DirtyPiece& dp2 = mut_accumulators()[next + 1].diff; - - auto& accumulators = mut_accumulators(); - - if constexpr (std::is_same_v) - { - if (dp2.remove_sq != SQ_NONE - && (accumulators[next].diff.threateningSqs & square_bb(dp2.remove_sq))) - { - double_inc_update(perspective, featureTransformer, ksq, accumulators[next], - accumulators[next + 1], accumulators[next - 1], dp2); - next++; - continue; - } - } - - if constexpr (std::is_same_v) - { - if (dp1.to != SQ_NONE && dp1.to == dp2.remove_sq) - { - const Square captureSq = dp1.to; - dp1.to = dp2.remove_sq = SQ_NONE; - double_inc_update(perspective, featureTransformer, ksq, accumulators[next], - accumulators[next + 1], accumulators[next - 1]); - dp1.to = dp2.remove_sq = captureSq; - next++; - continue; - } - } - } - - update_accumulator_incremental(perspective, featureTransformer, ksq, - mut_accumulators()[next], - accumulators()[next - 1]); - } - - assert((latest().acc()).computed[perspective]); -} - -template -void AccumulatorStack::backward_update_incremental( - Color perspective, - - const Position& pos, - const FeatureTransformer& featureTransformer, - const std::size_t end) noexcept { - - assert(end < accumulators().size()); - assert(end < size); - assert((latest().template acc()).computed[perspective]); - - const Square ksq = pos.square(perspective); - - for (std::int64_t next = std::int64_t(size) - 2; next >= std::int64_t(end); next--) - update_accumulator_incremental(perspective, featureTransformer, ksq, - mut_accumulators()[next], - accumulators()[next + 1]); - - assert((accumulators()[end].template acc()).computed[perspective]); -} - -// Explicit template instantiations -template void AccumulatorStack::evaluate( - const Position& pos, - const FeatureTransformer& featureTransformer, - AccumulatorCaches::Cache& cache) noexcept; -template void AccumulatorStack::evaluate( - const Position& pos, - const FeatureTransformer& featureTransformer, - AccumulatorCaches::Cache& cache) noexcept; - - -namespace { - -template, bool> = true> -void fused_row_reduce(const ElementType* in, ElementType* out, const Ts* const... rows) { - constexpr IndexType size = Width * sizeof(ElementType) / sizeof(typename VectorWrapper::type); - - auto* vecIn = reinterpret_cast(in); - auto* vecOut = reinterpret_cast(out); - - for (IndexType i = 0; i < size; ++i) - vecOut[i] = fused( - vecIn[i], reinterpret_cast(rows)[i]...); -} - -template -struct AccumulatorUpdateContext { - Color perspective; - const FeatureTransformer& featureTransformer; - const AccumulatorState& from; - AccumulatorState& to; - - AccumulatorUpdateContext(Color persp, - const FeatureTransformer& ft, - const AccumulatorState& accF, - AccumulatorState& accT) noexcept : - perspective{persp}, - featureTransformer{ft}, - from{accF}, - to{accT} {} - - template, bool> = true> - void apply(const Ts... indices) { - auto to_weight_vector = [&](const IndexType index) { - return &featureTransformer.weights[index * Dimensions]; - }; - - auto to_psqt_weight_vector = [&](const IndexType index) { - return &featureTransformer.psqtWeights[index * PSQTBuckets]; - }; - - fused_row_reduce( - (from.template acc()).accumulation[perspective].data(), - (to.template acc()).accumulation[perspective].data(), - to_weight_vector(indices)...); - - fused_row_reduce( - (from.template acc()).psqtAccumulation[perspective].data(), - (to.template acc()).psqtAccumulation[perspective].data(), - to_psqt_weight_vector(indices)...); - } - - void apply(const typename FeatureSet::IndexList& added, - const typename FeatureSet::IndexList& removed) { - const auto& fromAcc = from.template acc().accumulation[perspective]; - auto& toAcc = to.template acc().accumulation[perspective]; - - const auto& fromPsqtAcc = from.template acc().psqtAccumulation[perspective]; - auto& toPsqtAcc = to.template acc().psqtAccumulation[perspective]; - -#ifdef VECTOR - using Tiling = SIMDTiling; - vec_t acc[Tiling::NumRegs]; - psqt_vec_t psqt[Tiling::NumPsqtRegs]; - - const auto* threatWeights = &featureTransformer.threatWeights[0]; - - for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j) - { - auto* fromTile = reinterpret_cast(&fromAcc[j * Tiling::TileHeight]); - auto* toTile = reinterpret_cast(&toAcc[j * Tiling::TileHeight]); - - for (IndexType k = 0; k < Tiling::NumRegs; ++k) - acc[k] = fromTile[k]; - - for (int i = 0; i < removed.ssize(); ++i) - { - size_t index = removed[i]; - const size_t offset = Dimensions * index; - auto* column = reinterpret_cast(&threatWeights[offset]); - - #ifdef USE_NEON - for (IndexType k = 0; k < Tiling::NumRegs; k += 2) - { - acc[k] = vec_sub_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2]))); - acc[k + 1] = vec_sub_16(acc[k + 1], vmovl_high_s8(column[k / 2])); - } - #else - for (IndexType k = 0; k < Tiling::NumRegs; ++k) - acc[k] = vec_sub_16(acc[k], vec_convert_8_16(column[k])); - #endif - } - - for (int i = 0; i < added.ssize(); ++i) - { - size_t index = added[i]; - const size_t offset = Dimensions * index; - auto* column = reinterpret_cast(&threatWeights[offset]); - - #ifdef USE_NEON - for (IndexType k = 0; k < Tiling::NumRegs; k += 2) - { - acc[k] = vec_add_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2]))); - acc[k + 1] = vec_add_16(acc[k + 1], vmovl_high_s8(column[k / 2])); - } - #else - for (IndexType k = 0; k < Tiling::NumRegs; ++k) - acc[k] = vec_add_16(acc[k], vec_convert_8_16(column[k])); - #endif - } - - for (IndexType k = 0; k < Tiling::NumRegs; k++) - vec_store(&toTile[k], acc[k]); - - threatWeights += Tiling::TileHeight; - } - - for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j) - { - auto* fromTilePsqt = - reinterpret_cast(&fromPsqtAcc[j * Tiling::PsqtTileHeight]); - auto* toTilePsqt = - reinterpret_cast(&toPsqtAcc[j * Tiling::PsqtTileHeight]); - - for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) - psqt[k] = fromTilePsqt[k]; - - for (int i = 0; i < removed.ssize(); ++i) - { - size_t index = removed[i]; - const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; - auto* columnPsqt = reinterpret_cast( - &featureTransformer.threatPsqtWeights[offset]); - - for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) - psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); - } - - for (int i = 0; i < added.ssize(); ++i) - { - size_t index = added[i]; - const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; - auto* columnPsqt = reinterpret_cast( - &featureTransformer.threatPsqtWeights[offset]); - - for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - - for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) - vec_store_psqt(&toTilePsqt[k], psqt[k]); - } - -#else - - toAcc = fromAcc; - toPsqtAcc = fromPsqtAcc; - - for (const auto index : removed) - { - const IndexType offset = Dimensions * index; - - for (IndexType j = 0; j < Dimensions; ++j) - toAcc[j] -= featureTransformer.threatWeights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - toPsqtAcc[k] -= featureTransformer.threatPsqtWeights[index * PSQTBuckets + k]; - } - - for (const auto index : added) - { - const IndexType offset = Dimensions * index; - - for (IndexType j = 0; j < Dimensions; ++j) - toAcc[j] += featureTransformer.threatWeights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - toPsqtAcc[k] += featureTransformer.threatPsqtWeights[index * PSQTBuckets + k]; - } - -#endif - } -}; - -template -auto make_accumulator_update_context(Color perspective, - const FeatureTransformer& featureTransformer, - const AccumulatorState& accumulatorFrom, - AccumulatorState& accumulatorTo) noexcept { - return AccumulatorUpdateContext{perspective, featureTransformer, - accumulatorFrom, accumulatorTo}; -} - -template -void double_inc_update(Color perspective, - const FeatureTransformer& featureTransformer, - const Square ksq, - AccumulatorState& middle_state, - AccumulatorState& target_state, - const AccumulatorState& computed) { - - assert(computed.acc().computed[perspective]); - assert(!middle_state.acc().computed[perspective]); - assert(!target_state.acc().computed[perspective]); - - PSQFeatureSet::IndexList removed, added; - PSQFeatureSet::append_changed_indices(perspective, ksq, middle_state.diff, removed, added); - // you can't capture a piece that was just involved in castling since the rook ends up - // in a square that the king passed - assert(added.size() < 2); - PSQFeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added); - - [[maybe_unused]] const int addedSize = added.ssize(); - [[maybe_unused]] const int removedSize = removed.ssize(); - - assert(addedSize == 1); - assert(removedSize == 2 || removedSize == 3); - - // Workaround compiler warning for uninitialized variables, replicated on - // profile builds on windows with gcc 14.2.0. - // Also helps with optimizations on some compilers. - - sf_assume(addedSize == 1); - sf_assume(removedSize == 2 || removedSize == 3); - - auto updateContext = - make_accumulator_update_context(perspective, featureTransformer, computed, target_state); - - if (removedSize == 2) - { - updateContext.template apply(added[0], removed[0], removed[1]); - } - else - { - updateContext.template apply(added[0], removed[0], removed[1], - removed[2]); - } - - target_state.acc().computed[perspective] = true; -} - -template -void double_inc_update(Color perspective, - const FeatureTransformer& featureTransformer, - const Square ksq, - AccumulatorState& middle_state, - AccumulatorState& target_state, - const AccumulatorState& computed, - const DirtyPiece& dp2) { - - assert(computed.acc().computed[perspective]); - assert(!middle_state.acc().computed[perspective]); - assert(!target_state.acc().computed[perspective]); - - ThreatFeatureSet::FusedUpdateData fusedData; - - fusedData.dp2removed = dp2.remove_sq; - - ThreatFeatureSet::IndexList removed, added; - const auto* pfBase = &featureTransformer.threatWeights[0]; - auto pfStride = static_cast(TransformedFeatureDimensions); - ThreatFeatureSet::append_changed_indices(perspective, ksq, middle_state.diff, removed, added, - &fusedData, true, pfBase, pfStride); - ThreatFeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added, - &fusedData, false, pfBase, pfStride); - - auto updateContext = - make_accumulator_update_context(perspective, featureTransformer, computed, target_state); - - updateContext.apply(added, removed); - - target_state.acc().computed[perspective] = true; -} - -template -void update_accumulator_incremental( - Color perspective, - const FeatureTransformer& featureTransformer, - const Square ksq, - AccumulatorState& target_state, - const AccumulatorState& computed) { - - assert((computed.template acc()).computed[perspective]); - assert(!(target_state.template acc()).computed[perspective]); - - // The size must be enough to contain the largest possible update. - // That might depend on the feature set and generally relies on the - // feature set's update cost calculation to be correct and never allow - // updates with more added/removed features than MaxActiveDimensions. - // In this case, the maximum size of both feature addition and removal - // is 2, since we are incrementally updating one move at a time. - typename FeatureSet::IndexList removed, added; - if constexpr (std::is_same_v) - { - const auto* pfBase = &featureTransformer.threatWeights[0]; - auto pfStride = static_cast(TransformedFeatureDimensions); - if constexpr (Forward) - FeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added, - nullptr, false, pfBase, pfStride); - else - FeatureSet::append_changed_indices(perspective, ksq, computed.diff, added, removed, - nullptr, false, pfBase, pfStride); - } - else - { - if constexpr (Forward) - FeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added); - else - FeatureSet::append_changed_indices(perspective, ksq, computed.diff, added, removed); - } - - auto updateContext = - make_accumulator_update_context(perspective, featureTransformer, computed, target_state); - - if constexpr (std::is_same_v) - updateContext.apply(added, removed); - else - { - [[maybe_unused]] const int addedSize = added.ssize(); - [[maybe_unused]] const int removedSize = removed.ssize(); - - assert(addedSize == 1 || addedSize == 2); - assert(removedSize == 1 || removedSize == 2); - assert((Forward && addedSize <= removedSize) || (!Forward && addedSize >= removedSize)); - - // Workaround compiler warning for uninitialized variables, replicated - // on profile builds on windows with gcc 14.2.0. - // Also helps with optimizations on some compilers. - - sf_assume(addedSize == 1 || addedSize == 2); - sf_assume(removedSize == 1 || removedSize == 2); - - if (!(removedSize == 1 || removedSize == 2) || !(addedSize == 1 || addedSize == 2)) - sf_unreachable(); - - if ((Forward && removedSize == 1) || (!Forward && addedSize == 1)) - { - assert(addedSize == 1 && removedSize == 1); - updateContext.template apply(added[0], removed[0]); - } - else if (Forward && addedSize == 1) - { - assert(removedSize == 2); - updateContext.template apply(added[0], removed[0], removed[1]); - } - else if (!Forward && removedSize == 1) - { - assert(addedSize == 2); - updateContext.template apply(added[0], added[1], removed[0]); - } - else - { - assert(addedSize == 2 && removedSize == 2); - updateContext.template apply(added[0], added[1], removed[0], - removed[1]); - } - } - - (target_state.template acc()).computed[perspective] = true; -} - -Bitboard get_changed_pieces(const std::array& oldPieces, - const std::array& newPieces) { -#if defined(USE_AVX512) || defined(USE_AVX2) - static_assert(sizeof(Piece) == 1); - Bitboard sameBB = 0; - - for (int i = 0; i < 64; i += 32) - { - const __m256i old_v = _mm256_loadu_si256(reinterpret_cast(&oldPieces[i])); - const __m256i new_v = _mm256_loadu_si256(reinterpret_cast(&newPieces[i])); - const __m256i cmpEqual = _mm256_cmpeq_epi8(old_v, new_v); - const std::uint32_t equalMask = _mm256_movemask_epi8(cmpEqual); - sameBB |= static_cast(equalMask) << i; - } - return ~sameBB; -#elif defined(USE_NEON) - uint8x16x4_t old_v = vld4q_u8(reinterpret_cast(oldPieces.data())); - uint8x16x4_t new_v = vld4q_u8(reinterpret_cast(newPieces.data())); - auto cmp = [=](const int i) { return vceqq_u8(old_v.val[i], new_v.val[i]); }; - - uint8x16_t cmp0_1 = vsriq_n_u8(cmp(1), cmp(0), 1); - uint8x16_t cmp2_3 = vsriq_n_u8(cmp(3), cmp(2), 1); - uint8x16_t merged = vsriq_n_u8(cmp2_3, cmp0_1, 2); - merged = vsriq_n_u8(merged, merged, 4); - uint8x8_t sameBB = vshrn_n_u16(vreinterpretq_u16_u8(merged), 4); - - return ~vget_lane_u64(vreinterpret_u64_u8(sameBB), 0); -#else - Bitboard changed = 0; - - for (Square sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) - changed |= static_cast(oldPieces[sq] != newPieces[sq]) << sq; - - return changed; -#endif -} - -template -void update_accumulator_refresh_cache(Color perspective, - const FeatureTransformer& featureTransformer, - const Position& pos, - AccumulatorState& accumulatorState, - AccumulatorCaches::Cache& cache) { - - using Tiling [[maybe_unused]] = SIMDTiling; - - const Square ksq = pos.square(perspective); - auto& entry = cache[ksq][perspective]; - PSQFeatureSet::IndexList removed, added; - - const Bitboard changedBB = get_changed_pieces(entry.pieces, pos.piece_array()); - Bitboard removedBB = changedBB & entry.pieceBB; - Bitboard addedBB = changedBB & pos.pieces(); - - while (removedBB) - { - Square sq = pop_lsb(removedBB); - removed.push_back(PSQFeatureSet::make_index(perspective, sq, entry.pieces[sq], ksq)); - } - while (addedBB) - { - Square sq = pop_lsb(addedBB); - added.push_back(PSQFeatureSet::make_index(perspective, sq, pos.piece_on(sq), ksq)); - } - - entry.pieceBB = pos.pieces(); - entry.pieces = pos.piece_array(); - - auto& accumulator = accumulatorState.acc(); - accumulator.computed[perspective] = true; - -#ifdef VECTOR - vec_t acc[Tiling::NumRegs]; - psqt_vec_t psqt[Tiling::NumPsqtRegs]; - - const auto* weights = &featureTransformer.weights[0]; - - for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j) - { - auto* accTile = - reinterpret_cast(&accumulator.accumulation[perspective][j * Tiling::TileHeight]); - auto* entryTile = reinterpret_cast(&entry.accumulation[j * Tiling::TileHeight]); - - for (IndexType k = 0; k < Tiling::NumRegs; ++k) - acc[k] = entryTile[k]; - - int i = 0; - for (; i < std::min(removed.ssize(), added.ssize()); ++i) - { - size_t indexR = removed[i]; - const size_t offsetR = Dimensions * indexR; - auto* columnR = reinterpret_cast(&weights[offsetR]); - size_t indexA = added[i]; - const size_t offsetA = Dimensions * indexA; - auto* columnA = reinterpret_cast(&weights[offsetA]); - - for (IndexType k = 0; k < Tiling::NumRegs; ++k) - acc[k] = fused(acc[k], columnA[k], columnR[k]); - } - for (; i < removed.ssize(); ++i) - { - size_t index = removed[i]; - const size_t offset = Dimensions * index; - auto* column = reinterpret_cast(&weights[offset]); - - for (IndexType k = 0; k < Tiling::NumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); - } - for (; i < added.ssize(); ++i) - { - size_t index = added[i]; - const size_t offset = Dimensions * index; - auto* column = reinterpret_cast(&weights[offset]); - - for (IndexType k = 0; k < Tiling::NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - - for (IndexType k = 0; k < Tiling::NumRegs; k++) - vec_store(&entryTile[k], acc[k]); - for (IndexType k = 0; k < Tiling::NumRegs; k++) - vec_store(&accTile[k], acc[k]); - - weights += Tiling::TileHeight; - } - - for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j) - { - auto* accTilePsqt = reinterpret_cast( - &accumulator.psqtAccumulation[perspective][j * Tiling::PsqtTileHeight]); - auto* entryTilePsqt = - reinterpret_cast(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]); - - for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) - psqt[k] = entryTilePsqt[k]; - - for (int i = 0; i < removed.ssize(); ++i) - { - size_t index = removed[i]; - const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; - auto* columnPsqt = - reinterpret_cast(&featureTransformer.psqtWeights[offset]); - - for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) - psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); - } - for (int i = 0; i < added.ssize(); ++i) - { - size_t index = added[i]; - const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; - auto* columnPsqt = - reinterpret_cast(&featureTransformer.psqtWeights[offset]); - - for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - - for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) - vec_store_psqt(&entryTilePsqt[k], psqt[k]); - for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqt[k], psqt[k]); - } - -#else - - for (const auto index : removed) - { - const IndexType offset = Dimensions * index; - for (IndexType j = 0; j < Dimensions; ++j) - entry.accumulation[j] -= featureTransformer.weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - entry.psqtAccumulation[k] -= featureTransformer.psqtWeights[index * PSQTBuckets + k]; - } - for (const auto index : added) - { - const IndexType offset = Dimensions * index; - for (IndexType j = 0; j < Dimensions; ++j) - entry.accumulation[j] += featureTransformer.weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - entry.psqtAccumulation[k] += featureTransformer.psqtWeights[index * PSQTBuckets + k]; - } - - // The accumulator of the refresh entry has been updated. - // Now copy its content to the actual accumulator we were refreshing. - accumulator.accumulation[perspective] = entry.accumulation; - accumulator.psqtAccumulation[perspective] = entry.psqtAccumulation; -#endif -} - -template -void update_threats_accumulator_full(Color perspective, - const FeatureTransformer& featureTransformer, - const Position& pos, - AccumulatorState& accumulatorState) { - using Tiling [[maybe_unused]] = SIMDTiling; - - ThreatFeatureSet::IndexList active; - ThreatFeatureSet::append_active_indices(perspective, pos, active); - - auto& accumulator = accumulatorState.acc(); - accumulator.computed[perspective] = true; - -#ifdef VECTOR - vec_t acc[Tiling::NumRegs]; - psqt_vec_t psqt[Tiling::NumPsqtRegs]; - - const auto* threatWeights = &featureTransformer.threatWeights[0]; - - for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j) - { - auto* accTile = - reinterpret_cast(&accumulator.accumulation[perspective][j * Tiling::TileHeight]); - - for (IndexType k = 0; k < Tiling::NumRegs; ++k) - acc[k] = vec_zero(); - - int i = 0; - - for (; i < active.ssize(); ++i) - { - size_t index = active[i]; - const size_t offset = Dimensions * index; - auto* column = reinterpret_cast(&threatWeights[offset]); - - #ifdef USE_NEON - for (IndexType k = 0; k < Tiling::NumRegs; k += 2) - { - acc[k] = vec_add_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2]))); - acc[k + 1] = vec_add_16(acc[k + 1], vmovl_high_s8(column[k / 2])); - } - #else - for (IndexType k = 0; k < Tiling::NumRegs; ++k) - acc[k] = vec_add_16(acc[k], vec_convert_8_16(column[k])); - #endif - } - - for (IndexType k = 0; k < Tiling::NumRegs; k++) - vec_store(&accTile[k], acc[k]); - - threatWeights += Tiling::TileHeight; - } - - for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j) - { - auto* accTilePsqt = reinterpret_cast( - &accumulator.psqtAccumulation[perspective][j * Tiling::PsqtTileHeight]); - - for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) - psqt[k] = vec_zero_psqt(); - - for (int i = 0; i < active.ssize(); ++i) - { - size_t index = active[i]; - const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; - auto* columnPsqt = - reinterpret_cast(&featureTransformer.threatPsqtWeights[offset]); - - for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - - for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqt[k], psqt[k]); - } - -#else - - for (IndexType j = 0; j < Dimensions; ++j) - accumulator.accumulation[perspective][j] = 0; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[perspective][k] = 0; - - for (const auto index : active) - { - const IndexType offset = Dimensions * index; - - for (IndexType j = 0; j < Dimensions; ++j) - accumulator.accumulation[perspective][j] += - featureTransformer.threatWeights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[perspective][k] += - featureTransformer.threatPsqtWeights[index * PSQTBuckets + k]; - } - -#endif -} - -} - -} diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h deleted file mode 100644 index 438074f430a8673e9d04bfe001b902e9c8072d25..0000000000000000000000000000000000000000 --- a/src/nnue/nnue_accumulator.h +++ /dev/null @@ -1,206 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Class for difference calculation of NNUE evaluation function - -#ifndef NNUE_ACCUMULATOR_H_INCLUDED -#define NNUE_ACCUMULATOR_H_INCLUDED - -#include -#include -#include -#include -#include - -#include "../types.h" -#include "nnue_architecture.h" -#include "nnue_common.h" - -namespace Stockfish { -class Position; -} - -namespace Stockfish::Eval::NNUE { - -template -struct alignas(CacheLineSize) Accumulator; - -template -class FeatureTransformer; - -// Class that holds the result of affine transformation of input features -template -struct alignas(CacheLineSize) Accumulator { - std::array, COLOR_NB> accumulation; - std::array, COLOR_NB> psqtAccumulation; - std::array computed = {}; -}; - - -// AccumulatorCaches struct provides per-thread accumulator caches, where each -// cache contains multiple entries for each of the possible king squares. -// When the accumulator needs to be refreshed, the cached entry is used to more -// efficiently update the accumulator, instead of rebuilding it from scratch. -// This idea, was first described by Luecx (author of Koivisto) and -// is commonly referred to as "Finny Tables". -struct AccumulatorCaches { - - template - AccumulatorCaches(const Networks& networks) { - clear(networks); - } - - template - struct alignas(CacheLineSize) Cache { - - struct alignas(CacheLineSize) Entry { - std::array accumulation; - std::array psqtAccumulation; - std::array pieces; - Bitboard pieceBB; - - // To initialize a refresh entry, we set all its bitboards empty, - // so we put the biases in the accumulation, without any weights on top - void clear(const std::array& biases) { - accumulation = biases; - std::memset(reinterpret_cast(this) + offsetof(Entry, psqtAccumulation), - 0, sizeof(Entry) - offsetof(Entry, psqtAccumulation)); - } - }; - - template - void clear(const Network& network) { - for (auto& entries1D : entries) - for (auto& entry : entries1D) - entry.clear(network.featureTransformer.biases); - } - - std::array& operator[](Square sq) { return entries[sq]; } - - std::array, SQUARE_NB> entries; - }; - - template - void clear(const Networks& networks) { - big.clear(networks.big); - small.clear(networks.small); - } - - Cache big; - Cache small; -}; - - -template -struct AccumulatorState { - Accumulator accumulatorBig; - Accumulator accumulatorSmall; - typename FeatureSet::DiffType diff; - - template - auto& acc() noexcept { - static_assert(Size == TransformedFeatureDimensionsBig - || Size == TransformedFeatureDimensionsSmall, - "Invalid size for accumulator"); - - if constexpr (Size == TransformedFeatureDimensionsBig) - return accumulatorBig; - else if constexpr (Size == TransformedFeatureDimensionsSmall) - return accumulatorSmall; - } - - template - const auto& acc() const noexcept { - static_assert(Size == TransformedFeatureDimensionsBig - || Size == TransformedFeatureDimensionsSmall, - "Invalid size for accumulator"); - - if constexpr (Size == TransformedFeatureDimensionsBig) - return accumulatorBig; - else if constexpr (Size == TransformedFeatureDimensionsSmall) - return accumulatorSmall; - } - - void reset(const typename FeatureSet::DiffType& dp) noexcept { - diff = dp; - accumulatorBig.computed.fill(false); - accumulatorSmall.computed.fill(false); - } - - typename FeatureSet::DiffType& reset() noexcept { - accumulatorBig.computed.fill(false); - accumulatorSmall.computed.fill(false); - return diff; - } -}; - -class AccumulatorStack { - public: - static constexpr std::size_t MaxSize = MAX_PLY + 1; - - template - [[nodiscard]] const AccumulatorState& latest() const noexcept; - - void reset() noexcept; - std::pair push() noexcept; - void pop() noexcept; - - template - void evaluate(const Position& pos, - const FeatureTransformer& featureTransformer, - AccumulatorCaches::Cache& cache) noexcept; - - private: - template - [[nodiscard]] AccumulatorState& mut_latest() noexcept; - - template - [[nodiscard]] const std::array, MaxSize>& accumulators() const noexcept; - - template - [[nodiscard]] std::array, MaxSize>& mut_accumulators() noexcept; - - template - void evaluate_side(Color perspective, - const Position& pos, - const FeatureTransformer& featureTransformer, - AccumulatorCaches::Cache& cache) noexcept; - - template - [[nodiscard]] std::size_t find_last_usable_accumulator(Color perspective) const noexcept; - - template - void forward_update_incremental(Color perspective, - const Position& pos, - const FeatureTransformer& featureTransformer, - const std::size_t begin) noexcept; - - template - void backward_update_incremental(Color perspective, - const Position& pos, - const FeatureTransformer& featureTransformer, - const std::size_t end) noexcept; - - std::array, MaxSize> psq_accumulators; - std::array, MaxSize> threat_accumulators; - std::size_t size = 1; -}; - -} // namespace Stockfish::Eval::NNUE - -#endif // NNUE_ACCUMULATOR_H_INCLUDED diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h deleted file mode 100644 index 389c84bbd87dcc650756750643a2409f4d54490d..0000000000000000000000000000000000000000 --- a/src/nnue/nnue_architecture.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Input features and network structure used in NNUE evaluation function - -#ifndef NNUE_ARCHITECTURE_H_INCLUDED -#define NNUE_ARCHITECTURE_H_INCLUDED - -#include -#include -#include - -#include "features/half_ka_v2_hm.h" -#include "features/full_threats.h" -#include "layers/affine_transform.h" -#include "layers/affine_transform_sparse_input.h" -#include "layers/clipped_relu.h" -#include "layers/sqr_clipped_relu.h" -#include "nnue_common.h" - -namespace Stockfish::Eval::NNUE { - -// Input features used in evaluation function -using ThreatFeatureSet = Features::FullThreats; -using PSQFeatureSet = Features::HalfKAv2_hm; - -// Number of input feature dimensions after conversion -constexpr IndexType TransformedFeatureDimensionsBig = 1024; -constexpr int L2Big = 31; -constexpr int L3Big = 32; - -constexpr IndexType TransformedFeatureDimensionsSmall = 128; -constexpr int L2Small = 15; -constexpr int L3Small = 32; - -constexpr IndexType PSQTBuckets = 8; -constexpr IndexType LayerStacks = 8; - -// If vector instructions are enabled, we update and refresh the -// accumulator tile by tile such that each tile fits in the CPU's -// vector registers. -static_assert(PSQTBuckets % 8 == 0, - "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); - -template -struct NetworkArchitecture { - static constexpr IndexType TransformedFeatureDimensions = L1; - static constexpr int FC_0_OUTPUTS = L2; - static constexpr int FC_1_OUTPUTS = L3; - - Layers::AffineTransformSparseInput fc_0; - Layers::SqrClippedReLU ac_sqr_0; - Layers::ClippedReLU ac_0; - Layers::AffineTransform fc_1; - Layers::ClippedReLU ac_1; - Layers::AffineTransform fc_2; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value() { - // input slice hash - std::uint32_t hashValue = 0xEC42E90Du; - hashValue ^= TransformedFeatureDimensions * 2; - - hashValue = decltype(fc_0)::get_hash_value(hashValue); - hashValue = decltype(ac_0)::get_hash_value(hashValue); - hashValue = decltype(fc_1)::get_hash_value(hashValue); - hashValue = decltype(ac_1)::get_hash_value(hashValue); - hashValue = decltype(fc_2)::get_hash_value(hashValue); - - return hashValue; - } - - // Read network parameters - bool read_parameters(std::istream& stream) { - return fc_0.read_parameters(stream) && ac_0.read_parameters(stream) - && fc_1.read_parameters(stream) && ac_1.read_parameters(stream) - && fc_2.read_parameters(stream); - } - - // Write network parameters - bool write_parameters(std::ostream& stream) const { - return fc_0.write_parameters(stream) && ac_0.write_parameters(stream) - && fc_1.write_parameters(stream) && ac_1.write_parameters(stream) - && fc_2.write_parameters(stream); - } - - std::int32_t propagate(const TransformedFeatureType* transformedFeatures) const { - struct alignas(CacheLineSize) Buffer { - alignas(CacheLineSize) typename decltype(fc_0)::OutputBuffer fc_0_out; - alignas(CacheLineSize) typename decltype(ac_sqr_0)::OutputType - ac_sqr_0_out[ceil_to_multiple(FC_0_OUTPUTS * 2, 32)]; - alignas(CacheLineSize) typename decltype(ac_0)::OutputBuffer ac_0_out; - alignas(CacheLineSize) typename decltype(fc_1)::OutputBuffer fc_1_out; - alignas(CacheLineSize) typename decltype(ac_1)::OutputBuffer ac_1_out; - alignas(CacheLineSize) typename decltype(fc_2)::OutputBuffer fc_2_out; - - Buffer() { std::memset(this, 0, sizeof(*this)); } - }; - -#if defined(__clang__) && (__APPLE__) - // workaround for a bug reported with xcode 12 - static thread_local auto tlsBuffer = std::make_unique(); - // Access TLS only once, cache result. - Buffer& buffer = *tlsBuffer; -#else - alignas(CacheLineSize) static thread_local Buffer buffer; -#endif - - fc_0.propagate(transformedFeatures, buffer.fc_0_out); - ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out); - ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out); - std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out, - FC_0_OUTPUTS * sizeof(typename decltype(ac_0)::OutputType)); - fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out); - ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out); - fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out); - - // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1< -struct std::hash> { - std::size_t - operator()(const Stockfish::Eval::NNUE::NetworkArchitecture& arch) const noexcept { - return arch.get_content_hash(); - } -}; - -#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h deleted file mode 100644 index bd546d277532904fc51ab3256a6ca61b29a69afb..0000000000000000000000000000000000000000 --- a/src/nnue/nnue_common.h +++ /dev/null @@ -1,293 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Constants used in NNUE evaluation function - -#ifndef NNUE_COMMON_H_INCLUDED -#define NNUE_COMMON_H_INCLUDED - -#include -#include -#include -#include -#include -#include - -#include "../misc.h" - -#if defined(USE_AVX2) - #include - -#elif defined(USE_SSE41) - #include - -#elif defined(USE_SSSE3) - #include - -#elif defined(USE_SSE2) - #include - -#elif defined(USE_NEON) - #include -#endif - -namespace Stockfish::Eval::NNUE { - -using BiasType = std::int16_t; -using ThreatWeightType = std::int8_t; -using WeightType = std::int16_t; -using PSQTWeightType = std::int32_t; -using IndexType = std::uint32_t; - -// Version of the evaluation file -constexpr std::uint32_t Version = 0x7AF32F20u; - -// Constant used in evaluation value calculation -constexpr int OutputScale = 16; -constexpr int WeightScaleBits = 6; - -// Size of cache line (in bytes) -constexpr std::size_t CacheLineSize = 64; - -constexpr const char Leb128MagicString[] = "COMPRESSED_LEB128"; -constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1; - -// SIMD width (in bytes) -#if defined(USE_AVX2) -constexpr std::size_t SimdWidth = 32; - -#elif defined(USE_SSE2) -constexpr std::size_t SimdWidth = 16; - -#elif defined(USE_NEON) -constexpr std::size_t SimdWidth = 16; -#endif - -constexpr std::size_t MaxSimdWidth = 32; - -// Type of input feature after conversion -using TransformedFeatureType = std::uint8_t; - -// Round n up to be a multiple of base -template -constexpr IntType ceil_to_multiple(IntType n, IntType base) { - return (n + base - 1) / base * base; -} - - -// Utility to read an integer (signed or unsigned, any size) -// from a stream in little-endian order. We swap the byte order after the read if -// necessary to return a result with the byte ordering of the compiling machine. -template -inline IntType read_little_endian(std::istream& stream) { - IntType result; - - if (IsLittleEndian) - stream.read(reinterpret_cast(&result), sizeof(IntType)); - else - { - std::uint8_t u[sizeof(IntType)]; - std::make_unsigned_t v = 0; - - stream.read(reinterpret_cast(u), sizeof(IntType)); - for (std::size_t i = 0; i < sizeof(IntType); ++i) - v = (v << 8) | u[sizeof(IntType) - i - 1]; - - std::memcpy(&result, &v, sizeof(IntType)); - } - - return result; -} - - -// Utility to write an integer (signed or unsigned, any size) -// to a stream in little-endian order. We swap the byte order before the write if -// necessary to always write in little-endian order, independently of the byte -// ordering of the compiling machine. -template -inline void write_little_endian(std::ostream& stream, IntType value) { - - if (IsLittleEndian) - stream.write(reinterpret_cast(&value), sizeof(IntType)); - else - { - std::uint8_t u[sizeof(IntType)]; - std::make_unsigned_t v = value; - - std::size_t i = 0; - // if constexpr to silence the warning about shift by 8 - if constexpr (sizeof(IntType) > 1) - { - for (; i + 1 < sizeof(IntType); ++i) - { - u[i] = std::uint8_t(v); - v >>= 8; - } - } - u[i] = std::uint8_t(v); - - stream.write(reinterpret_cast(u), sizeof(IntType)); - } -} - - -// Read integers in bulk from a little-endian stream. -// This reads N integers from stream s and puts them in array out. -template -inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) { - if (IsLittleEndian) - stream.read(reinterpret_cast(out), sizeof(IntType) * count); - else - for (std::size_t i = 0; i < count; ++i) - out[i] = read_little_endian(stream); -} - - -// Write integers in bulk to a little-endian stream. -// This takes N integers from array values and writes them on stream s. -template -inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) { - if (IsLittleEndian) - stream.write(reinterpret_cast(values), sizeof(IntType) * count); - else - for (std::size_t i = 0; i < count; ++i) - write_little_endian(stream, values[i]); -} - -// Read N signed integers from the stream s, putting them in the array out. -// The stream is assumed to be compressed using the signed LEB128 format. -// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. -template -inline void read_leb_128_detail(std::istream& stream, - std::array& out, - std::uint32_t& bytes_left, - BufType& buf, - std::uint32_t& buf_pos) { - - static_assert(std::is_signed_v, "Not implemented for unsigned types"); - static_assert(sizeof(IntType) <= 4, "Not implemented for types larger than 32 bit"); - - IntType result = 0; - size_t shift = 0, i = 0; - while (i < Count) - { - if (buf_pos == buf.size()) - { - stream.read(reinterpret_cast(buf.data()), - std::min(std::size_t(bytes_left), buf.size())); - buf_pos = 0; - } - - std::uint8_t byte = buf[buf_pos++]; - --bytes_left; - result |= (byte & 0x7f) << (shift % 32); - shift += 7; - - if ((byte & 0x80) == 0) - { - out[i++] = (shift >= 32 || (byte & 0x40) == 0) ? result : result | ~((1 << shift) - 1); - result = 0; - shift = 0; - } - } -} - -template -inline void read_leb_128(std::istream& stream, Arrays&... outs) { - // Check the presence of our LEB128 magic string - char leb128MagicString[Leb128MagicStringSize]; - stream.read(leb128MagicString, Leb128MagicStringSize); - assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0); - - auto bytes_left = read_little_endian(stream); - std::array buf; - std::uint32_t buf_pos = std::uint32_t(buf.size()); - - (read_leb_128_detail(stream, outs, bytes_left, buf, buf_pos), ...); - - assert(bytes_left == 0); -} - - -// Write signed integers to a stream with LEB128 compression. -// This takes N integers from array values, compresses them with -// the LEB128 algorithm and writes the result on the stream s. -// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. -template -inline void write_leb_128(std::ostream& stream, const std::array& values) { - - // Write our LEB128 magic string - stream.write(Leb128MagicString, Leb128MagicStringSize); - - static_assert(std::is_signed_v, "Not implemented for unsigned types"); - - std::uint32_t byte_count = 0; - for (std::size_t i = 0; i < Count; ++i) - { - IntType value = values[i]; - std::uint8_t byte; - do - { - byte = value & 0x7f; - value >>= 7; - ++byte_count; - } while ((byte & 0x40) == 0 ? value != 0 : value != -1); - } - - write_little_endian(stream, byte_count); - - const std::uint32_t BUF_SIZE = 4096; - std::uint8_t buf[BUF_SIZE]; - std::uint32_t buf_pos = 0; - - auto flush = [&]() { - if (buf_pos > 0) - { - stream.write(reinterpret_cast(buf), buf_pos); - buf_pos = 0; - } - }; - - auto write = [&](std::uint8_t b) { - buf[buf_pos++] = b; - if (buf_pos == BUF_SIZE) - flush(); - }; - - for (std::size_t i = 0; i < Count; ++i) - { - IntType value = values[i]; - while (true) - { - std::uint8_t byte = value & 0x7f; - value >>= 7; - if ((byte & 0x40) == 0 ? value == 0 : value == -1) - { - write(byte); - break; - } - write(byte | 0x80); - } - } - - flush(); -} - -} // namespace Stockfish::Eval::NNUE - -#endif // #ifndef NNUE_COMMON_H_INCLUDED diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h deleted file mode 100644 index 798c0fa11a57c1104c039db0364f6705638eec0f..0000000000000000000000000000000000000000 --- a/src/nnue/nnue_feature_transformer.h +++ /dev/null @@ -1,438 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// A class that converts the input features of the NNUE evaluation function - -#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED -#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED - -#include -#include -#include -#include -#include - -#include "../position.h" -#include "../types.h" -#include "nnue_accumulator.h" -#include "nnue_architecture.h" -#include "nnue_common.h" -#include "simd.h" - -namespace Stockfish::Eval::NNUE { - -// Returns the inverse of a permutation -template -constexpr std::array -invert_permutation(const std::array& order) { - std::array inverse{}; - for (std::size_t i = 0; i < order.size(); i++) - inverse[order[i]] = i; - return inverse; -} - -// Divide a byte region of size TotalSize to chunks of size -// BlockSize, and permute the blocks by a given order -template -void permute(std::array& data, const std::array& order) { - constexpr std::size_t TotalSize = N * sizeof(T); - - static_assert(TotalSize % (BlockSize * OrderSize) == 0, - "ChunkSize * OrderSize must perfectly divide TotalSize"); - - constexpr std::size_t ProcessChunkSize = BlockSize * OrderSize; - - std::array buffer{}; - - std::byte* const bytes = reinterpret_cast(data.data()); - - for (std::size_t i = 0; i < TotalSize; i += ProcessChunkSize) - { - std::byte* const values = &bytes[i]; - - for (std::size_t j = 0; j < OrderSize; j++) - { - auto* const buffer_chunk = &buffer[j * BlockSize]; - auto* const value_chunk = &values[order[j] * BlockSize]; - - std::copy(value_chunk, value_chunk + BlockSize, buffer_chunk); - } - - std::copy(std::begin(buffer), std::end(buffer), values); - } -} - -// Input feature converter -template -class FeatureTransformer { - static constexpr bool UseThreats = - (TransformedFeatureDimensions == TransformedFeatureDimensionsBig); - // Number of output dimensions for one side - static constexpr IndexType HalfDimensions = TransformedFeatureDimensions; - - public: - // Output type - using OutputType = TransformedFeatureType; - - // Number of input/output dimensions - static constexpr IndexType InputDimensions = PSQFeatureSet::Dimensions; - static constexpr IndexType ThreatInputDimensions = ThreatFeatureSet::Dimensions; - static constexpr IndexType TotalInputDimensions = - InputDimensions + (UseThreats ? ThreatInputDimensions : 0); - static constexpr IndexType OutputDimensions = HalfDimensions; - - // Size of forward propagation buffer - static constexpr std::size_t BufferSize = OutputDimensions * sizeof(OutputType); - - // Store the order by which 128-bit blocks of a 1024-bit data must - // be permuted so that calling packus on adjacent vectors of 16-bit - // integers loaded from the data results in the pre-permutation order - static constexpr auto PackusEpi16Order = []() -> std::array { -#if defined(USE_AVX512) - // _mm512_packus_epi16 after permutation: - // | 0 | 2 | 4 | 6 | // Vector 0 - // | 1 | 3 | 5 | 7 | // Vector 1 - // | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | // Packed Result - return {0, 2, 4, 6, 1, 3, 5, 7}; -#elif defined(USE_AVX2) - // _mm256_packus_epi16 after permutation: - // | 0 | 2 | | 4 | 6 | // Vector 0, 2 - // | 1 | 3 | | 5 | 7 | // Vector 1, 3 - // | 0 | 1 | 2 | 3 | | 4 | 5 | 6 | 7 | // Packed Result - return {0, 2, 1, 3, 4, 6, 5, 7}; -#else - return {0, 1, 2, 3, 4, 5, 6, 7}; -#endif - }(); - - static constexpr auto InversePackusEpi16Order = invert_permutation(PackusEpi16Order); - - static constexpr std::uint32_t combine_hash(std::initializer_list hashes) { - std::uint32_t hash = 0; - for (const auto component_hash : hashes) - { - hash = (hash << 1) | (hash >> 31); - hash ^= component_hash; - } - return hash; - } - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value() { - return (UseThreats ? combine_hash({ThreatFeatureSet::HashValue, PSQFeatureSet::HashValue}) - : PSQFeatureSet::HashValue) - ^ (OutputDimensions * 2); - } - - void permute_weights() { - permute<16>(biases, PackusEpi16Order); - permute<16>(weights, PackusEpi16Order); - - if constexpr (UseThreats) - permute<8>(threatWeights, PackusEpi16Order); - } - - void unpermute_weights() { - permute<16>(biases, InversePackusEpi16Order); - permute<16>(weights, InversePackusEpi16Order); - - if constexpr (UseThreats) - permute<8>(threatWeights, InversePackusEpi16Order); - } - - // Read network parameters - bool read_parameters(std::istream& stream) { - read_leb_128(stream, biases); - - if constexpr (UseThreats) - { - read_little_endian(stream, threatWeights.data(), - ThreatInputDimensions * HalfDimensions); - read_leb_128(stream, weights); - - read_leb_128(stream, threatPsqtWeights, psqtWeights); - } - else - { - read_leb_128(stream, weights); - read_leb_128(stream, psqtWeights); - } - - permute_weights(); - - return !stream.fail(); - } - - // Write network parameters - bool write_parameters(std::ostream& stream) const { - std::unique_ptr copy = std::make_unique(*this); - - copy->unpermute_weights(); - - write_leb_128(stream, copy->biases); - - if constexpr (UseThreats) - { - write_little_endian(stream, copy->threatWeights.data(), - ThreatInputDimensions * HalfDimensions); - write_leb_128(stream, copy->weights); - - auto combinedPsqtWeights = - std::make_unique>(); - - std::copy(std::begin(copy->threatPsqtWeights), - std::begin(copy->threatPsqtWeights) + ThreatInputDimensions * PSQTBuckets, - combinedPsqtWeights->begin()); - - std::copy(std::begin(copy->psqtWeights), - std::begin(copy->psqtWeights) + InputDimensions * PSQTBuckets, - combinedPsqtWeights->begin() + ThreatInputDimensions * PSQTBuckets); - - write_leb_128(stream, *combinedPsqtWeights); - } - else - { - write_leb_128(stream, copy->weights); - write_leb_128(stream, copy->psqtWeights); - } - - return !stream.fail(); - } - - std::size_t get_content_hash() const { - std::size_t h = 0; - - hash_combine(h, get_raw_data_hash(biases)); - hash_combine(h, get_raw_data_hash(weights)); - hash_combine(h, get_raw_data_hash(psqtWeights)); - - if constexpr (UseThreats) - { - hash_combine(h, get_raw_data_hash(threatWeights)); - hash_combine(h, get_raw_data_hash(threatPsqtWeights)); - } - - hash_combine(h, get_hash_value()); - - return h; - } - - // Convert input features - std::int32_t transform(const Position& pos, - AccumulatorStack& accumulatorStack, - AccumulatorCaches::Cache& cache, - OutputType* output, - int bucket) const { - - using namespace SIMD; - accumulatorStack.evaluate(pos, *this, cache); - const auto& accumulatorState = accumulatorStack.latest(); - const auto& threatAccumulatorState = accumulatorStack.latest(); - - const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; - const auto& psqtAccumulation = (accumulatorState.acc()).psqtAccumulation; - auto psqt = - (psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket]); - - if constexpr (UseThreats) - { - const auto& threatPsqtAccumulation = - (threatAccumulatorState.acc()).psqtAccumulation; - psqt = (psqt + threatPsqtAccumulation[perspectives[0]][bucket] - - threatPsqtAccumulation[perspectives[1]][bucket]) - / 2; - } - else - psqt /= 2; - - const auto& accumulation = (accumulatorState.acc()).accumulation; - const auto& threatAccumulation = - (threatAccumulatorState.acc()).accumulation; - - for (IndexType p = 0; p < 2; ++p) - { - const IndexType offset = (HalfDimensions / 2) * p; - -#if defined(VECTOR) - - constexpr IndexType OutputChunkSize = MaxChunkSize; - static_assert((HalfDimensions / 2) % OutputChunkSize == 0); - constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; - - const vec_t Zero = vec_zero(); - const vec_t One = vec_set_16(255); - - const vec_t* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); - const vec_t* in1 = - reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); - vec_t* out = reinterpret_cast(output + offset); - - // Per the NNUE architecture, here we want to multiply pairs of - // clipped elements and divide the product by 128. To do this, - // we can naively perform min/max operation to clip each of the - // four int16 vectors, mullo pairs together, then pack them into - // one int8 vector. However, there exists a faster way. - - // The idea here is to use the implicit clipping from packus to - // save us two vec_max_16 instructions. This clipping works due - // to the fact that any int16 integer below zero will be zeroed - // on packus. - - // Consider the case where the second element is negative. - // If we do standard clipping, that element will be zero, which - // means our pairwise product is zero. If we perform packus and - // remove the lower-side clip for the second element, then our - // product before packus will be negative, and is zeroed on pack. - // The two operation produce equivalent results, but the second - // one (using packus) saves one max operation per pair. - - // But here we run into a problem: mullo does not preserve the - // sign of the multiplication. We can get around this by doing - // mulhi, which keeps the sign. But that requires an additional - // tweak. - - // mulhi cuts off the last 16 bits of the resulting product, - // which is the same as performing a rightward shift of 16 bits. - // We can use this to our advantage. Recall that we want to - // divide the final product by 128, which is equivalent to a - // 7-bit right shift. Intuitively, if we shift the clipped - // value left by 9, and perform mulhi, which shifts the product - // right by 16 bits, then we will net a right shift of 7 bits. - // However, this won't work as intended. Since we clip the - // values to have a maximum value of 127, shifting it by 9 bits - // might occupy the signed bit, resulting in some positive - // values being interpreted as negative after the shift. - - // There is a way, however, to get around this limitation. When - // loading the network, scale accumulator weights and biases by - // 2. To get the same pairwise multiplication result as before, - // we need to divide the product by 128 * 2 * 2 = 512, which - // amounts to a right shift of 9 bits. So now we only have to - // shift left by 7 bits, perform mulhi (shifts right by 16 bits) - // and net a 9 bit right shift. Since we scaled everything by - // two, the values are clipped at 127 * 2 = 254, which occupies - // 8 bits. Shifting it by 7 bits left will no longer occupy the - // signed bit, so we are safe. - - // Note that on NEON processors, we shift left by 6 instead - // because the instruction "vqdmulhq_s16" also doubles the - // return value after the multiplication, adding an extra shift - // to the left by 1, so we compensate by shifting less before - // the multiplication. - - constexpr int shift = - #if defined(USE_SSE2) - 7; - #else - 6; - #endif - if constexpr (UseThreats) - { - const vec_t* tin0 = - reinterpret_cast(&(threatAccumulation[perspectives[p]][0])); - const vec_t* tin1 = reinterpret_cast( - &(threatAccumulation[perspectives[p]][HalfDimensions / 2])); - for (IndexType j = 0; j < NumOutputChunks; ++j) - { - const vec_t acc0a = vec_add_16(in0[j * 2 + 0], tin0[j * 2 + 0]); - const vec_t acc0b = vec_add_16(in0[j * 2 + 1], tin0[j * 2 + 1]); - const vec_t acc1a = vec_add_16(in1[j * 2 + 0], tin1[j * 2 + 0]); - const vec_t acc1b = vec_add_16(in1[j * 2 + 1], tin1[j * 2 + 1]); - - const vec_t sum0a = - vec_slli_16(vec_max_16(vec_min_16(acc0a, One), Zero), shift); - const vec_t sum0b = - vec_slli_16(vec_max_16(vec_min_16(acc0b, One), Zero), shift); - const vec_t sum1a = vec_min_16(acc1a, One); - const vec_t sum1b = vec_min_16(acc1b, One); - - const vec_t pa = vec_mulhi_16(sum0a, sum1a); - const vec_t pb = vec_mulhi_16(sum0b, sum1b); - - out[j] = vec_packus_16(pa, pb); - } - } - else - { - for (IndexType j = 0; j < NumOutputChunks; ++j) - { - const vec_t sum0a = - vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero), shift); - const vec_t sum0b = - vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero), shift); - const vec_t sum1a = vec_min_16(in1[j * 2 + 0], One); - const vec_t sum1b = vec_min_16(in1[j * 2 + 1], One); - - const vec_t pa = vec_mulhi_16(sum0a, sum1a); - const vec_t pb = vec_mulhi_16(sum0b, sum1b); - - out[j] = vec_packus_16(pa, pb); - } - } - -#else - - for (IndexType j = 0; j < HalfDimensions / 2; ++j) - { - BiasType sum0 = accumulation[static_cast(perspectives[p])][j + 0]; - BiasType sum1 = - accumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; - - if constexpr (UseThreats) - { - sum0 += threatAccumulation[static_cast(perspectives[p])][j + 0]; - sum1 += - threatAccumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; - } - - sum0 = std::clamp(sum0, 0, 255); - sum1 = std::clamp(sum1, 0, 255); - - output[offset + j] = static_cast(unsigned(sum0 * sum1) / 512); - } - -#endif - } - - return psqt; - } // end of function transform() - - alignas(CacheLineSize) std::array biases; - alignas(CacheLineSize) std::array weights; - alignas(CacheLineSize) - std::array threatWeights; - alignas(CacheLineSize) std::array psqtWeights; - alignas(CacheLineSize) - std::array threatPsqtWeights; -}; - -} // namespace Stockfish::Eval::NNUE - - -template -struct std::hash> { - std::size_t - operator()(const Stockfish::Eval::NNUE::FeatureTransformer& ft) - const noexcept { - return ft.get_content_hash(); - } -}; - -#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp deleted file mode 100644 index 66a6764a33de778ddc839bb259aa4212d6399f0e..0000000000000000000000000000000000000000 --- a/src/nnue/nnue_misc.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Code for calculating NNUE evaluation function - -#include "nnue_misc.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../position.h" -#include "../types.h" -#include "../uci.h" -#include "network.h" -#include "nnue_accumulator.h" - -namespace Stockfish::Eval::NNUE { - - -constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); - - -namespace { -// Converts a Value into (centi)pawns and writes it in a buffer. -// The buffer must have capacity for at least 5 chars. -void format_cp_compact(Value v, char* buffer, const Position& pos) { - - buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); - - int cp = std::abs(UCIEngine::to_cp(v, pos)); - if (cp >= 10000) - { - buffer[1] = '0' + cp / 10000; - cp %= 10000; - buffer[2] = '0' + cp / 1000; - cp %= 1000; - buffer[3] = '0' + cp / 100; - buffer[4] = ' '; - } - else if (cp >= 1000) - { - buffer[1] = '0' + cp / 1000; - cp %= 1000; - buffer[2] = '0' + cp / 100; - cp %= 100; - buffer[3] = '.'; - buffer[4] = '0' + cp / 10; - } - else - { - buffer[1] = '0' + cp / 100; - cp %= 100; - buffer[2] = '.'; - buffer[3] = '0' + cp / 10; - cp %= 10; - buffer[4] = '0' + cp / 1; - } -} - - -// Converts a Value into pawns, always keeping two decimals -void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& pos) { - - const double pawns = std::abs(0.01 * UCIEngine::to_cp(v, pos)); - - stream << (v < 0 ? '-' - : v > 0 ? '+' - : ' ') - << std::setiosflags(std::ios::fixed) << std::setw(6) << std::setprecision(2) << pawns; -} -} - - -// Returns a string with the value of each piece on a board, -// and a table for (PSQT, Layers) values bucket by bucket. -std::string -trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::AccumulatorCaches& caches) { - - std::stringstream ss; - - char board[3 * 8 + 1][8 * 8 + 2]; - std::memset(board, ' ', sizeof(board)); - for (int row = 0; row < 3 * 8 + 1; ++row) - board[row][8 * 8 + 1] = '\0'; - - // A lambda to output one box of the board - auto writeSquare = [&board, &pos](File file, Rank rank, Piece pc, Value value) { - const int x = int(file) * 8; - const int y = (7 - int(rank)) * 3; - for (int i = 1; i < 8; ++i) - board[y][x + i] = board[y + 3][x + i] = '-'; - for (int i = 1; i < 3; ++i) - board[y + i][x] = board[y + i][x + 8] = '|'; - board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+'; - if (pc != NO_PIECE) - board[y + 1][x + 4] = PieceToChar[pc]; - if (is_valid(value)) - format_cp_compact(value, &board[y + 2][x + 2], pos); - }; - - auto accumulators = std::make_unique(); - - // We estimate the value of each piece by doing a differential evaluation from - // the current base eval, simulating the removal of the piece from its square. - auto [psqt, positional] = networks.big.evaluate(pos, *accumulators, caches.big); - Value base = psqt + positional; - base = pos.side_to_move() == WHITE ? base : -base; - - for (File f = FILE_A; f <= FILE_H; ++f) - for (Rank r = RANK_1; r <= RANK_8; ++r) - { - Square sq = make_square(f, r); - Piece pc = pos.piece_on(sq); - Value v = VALUE_NONE; - - if (pc != NO_PIECE && type_of(pc) != KING) - { - pos.remove_piece(sq); - - accumulators->reset(); - std::tie(psqt, positional) = networks.big.evaluate(pos, *accumulators, caches.big); - Value eval = psqt + positional; - eval = pos.side_to_move() == WHITE ? eval : -eval; - v = base - eval; - - pos.put_piece(pc, sq); - } - - writeSquare(f, r, pc, v); - } - - ss << " NNUE derived piece values:\n"; - for (int row = 0; row < 3 * 8 + 1; ++row) - ss << board[row] << '\n'; - ss << '\n'; - - accumulators->reset(); - auto t = networks.big.trace_evaluate(pos, *accumulators, caches.big); - - ss << " NNUE network contributions " - << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl - << "+------------+------------+------------+------------+\n" - << "| Bucket | Material | Positional | Total |\n" - << "| | (PSQT) | (Layers) | |\n" - << "+------------+------------+------------+------------+\n"; - - for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) - { - ss << "| " << bucket << " " // - << " | "; - format_cp_aligned_dot(t.psqt[bucket], ss, pos); - ss << " " // - << " | "; - format_cp_aligned_dot(t.positional[bucket], ss, pos); - ss << " " // - << " | "; - format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos); - ss << " " // - << " |"; - if (bucket == t.correctBucket) - ss << " <-- this bucket is used"; - ss << '\n'; - } - - ss << "+------------+------------+------------+------------+\n"; - - return ss.str(); -} - - -} // namespace Stockfish::Eval::NNUE diff --git a/src/nnue/nnue_misc.h b/src/nnue/nnue_misc.h deleted file mode 100644 index ecece5589c27f83800053bbbfca118c233d4f4b5..0000000000000000000000000000000000000000 --- a/src/nnue/nnue_misc.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef NNUE_MISC_H_INCLUDED -#define NNUE_MISC_H_INCLUDED - -#include -#include -#include - -#include "../misc.h" -#include "../types.h" -#include "nnue_architecture.h" - -namespace Stockfish { - -class Position; - -namespace Eval::NNUE { - -// EvalFile uses fixed string types because it's part of the network structure which must be trivial. -struct EvalFile { - // Default net name, will use one of the EvalFileDefaultName* macros defined - // in evaluate.h - FixedString<256> defaultName; - // Selected net name, either via uci option or default - FixedString<256> current; - // Net description extracted from the net file - FixedString<256> netDescription; -}; - -struct NnueEvalTrace { - static_assert(LayerStacks == PSQTBuckets); - - Value psqt[LayerStacks]; - Value positional[LayerStacks]; - std::size_t correctBucket; -}; - -struct Networks; -struct AccumulatorCaches; - -std::string trace(Position& pos, const Networks& networks, AccumulatorCaches& caches); - -} // namespace Stockfish::Eval::NNUE -} // namespace Stockfish - -template<> -struct std::hash { - std::size_t operator()(const Stockfish::Eval::NNUE::EvalFile& evalFile) const noexcept { - std::size_t h = 0; - Stockfish::hash_combine(h, evalFile.defaultName); - Stockfish::hash_combine(h, evalFile.current); - Stockfish::hash_combine(h, evalFile.netDescription); - return h; - } -}; - -#endif // #ifndef NNUE_MISC_H_INCLUDED diff --git a/src/nnue/simd.h b/src/nnue/simd.h deleted file mode 100644 index 601792c1cd0d5476900d3491d3d1c243a4e2fd04..0000000000000000000000000000000000000000 --- a/src/nnue/simd.h +++ /dev/null @@ -1,440 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef NNUE_SIMD_H_INCLUDED -#define NNUE_SIMD_H_INCLUDED - -#if defined(USE_AVX2) - #include - -#elif defined(USE_SSE41) - #include - -#elif defined(USE_SSSE3) - #include - -#elif defined(USE_SSE2) - #include - -#elif defined(USE_NEON) - #include -#endif - -#include "../types.h" -#include "nnue_common.h" - -namespace Stockfish::Eval::NNUE::SIMD { - -// If vector instructions are enabled, we update and refresh the -// accumulator tile by tile such that each tile fits in the CPU's -// vector registers. -#define VECTOR - -#ifdef USE_AVX512 -using vec_t = __m512i; -using vec_i8_t = __m256i; -using vec128_t = __m128i; -using psqt_vec_t = __m256i; -using vec_uint_t = __m512i; - #define vec_load(a) _mm512_load_si512(a) - #define vec_store(a, b) _mm512_store_si512(a, b) - #define vec_convert_8_16(a) _mm512_cvtepi8_epi16(a) - #define vec_add_16(a, b) _mm512_add_epi16(a, b) - #define vec_sub_16(a, b) _mm512_sub_epi16(a, b) - #define vec_mulhi_16(a, b) _mm512_mulhi_epi16(a, b) - #define vec_zero() _mm512_setzero_epi32() - #define vec_set_16(a) _mm512_set1_epi16(a) - #define vec_max_16(a, b) _mm512_max_epi16(a, b) - #define vec_min_16(a, b) _mm512_min_epi16(a, b) - #define vec_slli_16(a, b) _mm512_slli_epi16(a, b) - // Inverse permuted at load time - #define vec_packus_16(a, b) _mm512_packus_epi16(a, b) - #define vec_load_psqt(a) _mm256_load_si256(a) - #define vec_store_psqt(a, b) _mm256_store_si256(a, b) - #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) - #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b) - #define vec_zero_psqt() _mm256_setzero_si256() - - #ifdef USE_SSSE3 - #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) - #endif - - #define vec128_zero _mm_setzero_si128() - #define vec128_set_16(a) _mm_set1_epi16(a) - #define vec128_load(a) _mm_load_si128(a) - #define vec128_storeu(a, b) _mm_storeu_si128(a, b) - #define vec128_add(a, b) _mm_add_epi16(a, b) - #define NumRegistersSIMD 16 - #define MaxChunkSize 64 - -#elif USE_AVX2 -using vec_t = __m256i; -using vec_i8_t = __m128i; -using vec128_t = __m128i; -using psqt_vec_t = __m256i; -using vec_uint_t = __m256i; - #define vec_load(a) _mm256_load_si256(a) - #define vec_store(a, b) _mm256_store_si256(a, b) - #define vec_convert_8_16(a) _mm256_cvtepi8_epi16(a) - #define vec_add_16(a, b) _mm256_add_epi16(a, b) - #define vec_sub_16(a, b) _mm256_sub_epi16(a, b) - #define vec_mulhi_16(a, b) _mm256_mulhi_epi16(a, b) - #define vec_zero() _mm256_setzero_si256() - #define vec_set_16(a) _mm256_set1_epi16(a) - #define vec_max_16(a, b) _mm256_max_epi16(a, b) - #define vec_min_16(a, b) _mm256_min_epi16(a, b) - #define vec_slli_16(a, b) _mm256_slli_epi16(a, b) - // Inverse permuted at load time - #define vec_packus_16(a, b) _mm256_packus_epi16(a, b) - #define vec_load_psqt(a) _mm256_load_si256(a) - #define vec_store_psqt(a, b) _mm256_store_si256(a, b) - #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) - #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b) - #define vec_zero_psqt() _mm256_setzero_si256() - - #ifdef USE_SSSE3 - #if defined(USE_VNNI) && !defined(USE_AVXVNNI) - #define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256()) - #else - #define vec_nnz(a) \ - _mm256_movemask_ps( \ - _mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) - #endif - #endif - - #define vec128_zero _mm_setzero_si128() - #define vec128_set_16(a) _mm_set1_epi16(a) - #define vec128_load(a) _mm_load_si128(a) - #define vec128_storeu(a, b) _mm_storeu_si128(a, b) - #define vec128_add(a, b) _mm_add_epi16(a, b) - - #define NumRegistersSIMD 12 - #define MaxChunkSize 32 - -#elif USE_SSE2 -using vec_t = __m128i; -using vec_i8_t = std::uint64_t; // for the correct size -- will be loaded into an xmm reg -using vec128_t = __m128i; -using psqt_vec_t = __m128i; -using vec_uint_t = __m128i; - #define vec_load(a) (*(a)) - #define vec_store(a, b) *(a) = (b) - #define vec_add_16(a, b) _mm_add_epi16(a, b) - #define vec_sub_16(a, b) _mm_sub_epi16(a, b) - #define vec_mulhi_16(a, b) _mm_mulhi_epi16(a, b) - #define vec_zero() _mm_setzero_si128() - #define vec_set_16(a) _mm_set1_epi16(a) - #define vec_max_16(a, b) _mm_max_epi16(a, b) - #define vec_min_16(a, b) _mm_min_epi16(a, b) - #define vec_slli_16(a, b) _mm_slli_epi16(a, b) - #define vec_packus_16(a, b) _mm_packus_epi16(a, b) - #define vec_load_psqt(a) (*(a)) - #define vec_store_psqt(a, b) *(a) = (b) - #define vec_add_psqt_32(a, b) _mm_add_epi32(a, b) - #define vec_sub_psqt_32(a, b) _mm_sub_epi32(a, b) - #define vec_zero_psqt() _mm_setzero_si128() - - #ifdef USE_SSSE3 - #define vec_nnz(a) \ - _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) - #endif - - #ifdef __i386__ -inline __m128i _mm_cvtsi64_si128(int64_t val) { - return _mm_loadl_epi64(reinterpret_cast(&val)); -} - #endif - - #ifdef USE_SSE41 - #define vec_convert_8_16(a) _mm_cvtepi8_epi16(_mm_cvtsi64_si128(static_cast(a))) - #else -// Credit: Yoshie2000 -inline __m128i vec_convert_8_16(uint64_t x) { - __m128i v8 = _mm_cvtsi64_si128(static_cast(x)); - __m128i sign = _mm_cmpgt_epi8(_mm_setzero_si128(), v8); - return _mm_unpacklo_epi8(v8, sign); -} - #endif - - #define vec128_zero _mm_setzero_si128() - #define vec128_set_16(a) _mm_set1_epi16(a) - #define vec128_load(a) _mm_load_si128(a) - #define vec128_storeu(a, b) _mm_storeu_si128(a, b) - #define vec128_add(a, b) _mm_add_epi16(a, b) - - #define NumRegistersSIMD (Is64Bit ? 12 : 6) - #define MaxChunkSize 16 - -#elif USE_NEON -using vec_i8x8_t __attribute__((may_alias)) = int8x8_t; -using vec_i16x8_t __attribute__((may_alias)) = int16x8_t; -using vec_i8x16_t __attribute__((may_alias)) = int8x16_t; -using vec_u16x8_t __attribute__((may_alias)) = uint16x8_t; -using vec_i32x4_t __attribute__((may_alias)) = int32x4_t; - -using vec_t __attribute__((may_alias)) = int16x8_t; -using vec_i8_t __attribute__((may_alias)) = int8x16_t; -using psqt_vec_t __attribute__((may_alias)) = int32x4_t; -using vec128_t __attribute__((may_alias)) = uint16x8_t; -using vec_uint_t __attribute__((may_alias)) = uint32x4_t; - #define vec_load(a) (*(a)) - #define vec_store(a, b) *(a) = (b) - #define vec_add_16(a, b) vaddq_s16(a, b) - #define vec_sub_16(a, b) vsubq_s16(a, b) - #define vec_mulhi_16(a, b) vqdmulhq_s16(a, b) - #define vec_zero() vec_t{0} - #define vec_set_16(a) vdupq_n_s16(a) - #define vec_max_16(a, b) vmaxq_s16(a, b) - #define vec_min_16(a, b) vminq_s16(a, b) - #define vec_slli_16(a, b) vshlq_s16(a, vec_set_16(b)) - #define vec_packus_16(a, b) reinterpret_cast(vcombine_u8(vqmovun_s16(a), vqmovun_s16(b))) - #define vec_load_psqt(a) (*(a)) - #define vec_store_psqt(a, b) *(a) = (b) - #define vec_add_psqt_32(a, b) vaddq_s32(a, b) - #define vec_sub_psqt_32(a, b) vsubq_s32(a, b) - #define vec_zero_psqt() psqt_vec_t{0} - -static constexpr std::uint32_t Mask[4] = {1, 2, 4, 8}; - #define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask))) - #define vec128_zero vdupq_n_u16(0) - #define vec128_set_16(a) vdupq_n_u16(a) - #define vec128_load(a) vld1q_u16(reinterpret_cast(a)) - #define vec128_storeu(a, b) vst1q_u16(reinterpret_cast(a), b) - #define vec128_add(a, b) vaddq_u16(a, b) - - #define NumRegistersSIMD 16 - #define MaxChunkSize 16 - - #ifndef __aarch64__ -// Single instruction doesn't exist on 32-bit ARM -inline int16x8_t vmovl_high_s8(int8x16_t val) { return vmovl_s8(vget_high_s8(val)); } - #endif - -#else - #undef VECTOR - -#endif - -struct Vec16Wrapper { -#ifdef VECTOR - using type = vec_t; - static type add(const type& lhs, const type& rhs) { return vec_add_16(lhs, rhs); } - static type sub(const type& lhs, const type& rhs) { return vec_sub_16(lhs, rhs); } -#else - using type = BiasType; - static type add(const type& lhs, const type& rhs) { return lhs + rhs; } - static type sub(const type& lhs, const type& rhs) { return lhs - rhs; } -#endif -}; - -struct Vec32Wrapper { -#ifdef VECTOR - using type = psqt_vec_t; - static type add(const type& lhs, const type& rhs) { return vec_add_psqt_32(lhs, rhs); } - static type sub(const type& lhs, const type& rhs) { return vec_sub_psqt_32(lhs, rhs); } -#else - using type = PSQTWeightType; - static type add(const type& lhs, const type& rhs) { return lhs + rhs; } - static type sub(const type& lhs, const type& rhs) { return lhs - rhs; } -#endif -}; - -enum UpdateOperation { - Add, - Sub -}; - -template = true> -typename VecWrapper::type fused(const typename VecWrapper::type& in) { - return in; -} - -template, bool> = true, - std::enable_if_t = true> -typename VecWrapper::type -fused(const typename VecWrapper::type& in, const T& operand, const Ts&... operands) { - switch (update_op) - { - case Add : - return fused(VecWrapper::add(in, operand), operands...); - case Sub : - return fused(VecWrapper::sub(in, operand), operands...); - default : - static_assert(update_op == Add || update_op == Sub, - "Only Add and Sub are currently supported."); - return typename VecWrapper::type(); - } -} - -#if defined(USE_AVX512) - -[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) { - return _mm512_reduce_add_epi32(sum) + bias; -} - -[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) { - - #if defined(USE_VNNI) - acc = _mm512_dpbusd_epi32(acc, a, b); - #else - __m512i product0 = _mm512_maddubs_epi16(a, b); - product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); - acc = _mm512_add_epi32(acc, product0); - #endif -} - -#endif - -#if defined(USE_AVX2) - -[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) { - __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); - sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); - sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); - return _mm_cvtsi128_si32(sum128) + bias; -} - -[[maybe_unused]] static void m256_add_dpbusd_epi32(__m256i& acc, __m256i a, __m256i b) { - - #if defined(USE_VNNI) - acc = _mm256_dpbusd_epi32(acc, a, b); - #else - __m256i product0 = _mm256_maddubs_epi16(a, b); - product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); - acc = _mm256_add_epi32(acc, product0); - #endif -} - -#endif - -#if defined(USE_SSSE3) - -[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) { - sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC - sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB - return _mm_cvtsi128_si32(sum) + bias; -} - -[[maybe_unused]] static void m128_add_dpbusd_epi32(__m128i& acc, __m128i a, __m128i b) { - - __m128i product0 = _mm_maddubs_epi16(a, b); - product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); - acc = _mm_add_epi32(acc, product0); -} - -#endif - -#if defined(USE_NEON_DOTPROD) - -[[maybe_unused]] static void -dotprod_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) { - - acc = vdotq_s32(acc, a, b); -} -#endif - -#if defined(USE_NEON) - -[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { - #if USE_NEON >= 8 - return vaddvq_s32(s); - #else - return s[0] + s[1] + s[2] + s[3]; - #endif -} - -[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) { - return neon_m128_reduce_add_epi32(sum) + bias; -} - -#endif - -#if USE_NEON >= 8 -[[maybe_unused]] static void neon_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) { - - int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b)); - int16x8_t product1 = vmull_high_s8(a, b); - int16x8_t sum = vpaddq_s16(product0, product1); - acc = vpadalq_s16(acc, sum); -} -#endif - - -// Compute optimal SIMD register count for feature transformer accumulation. -template -class SIMDTiling { -#ifdef VECTOR - // We use __m* types as template arguments, which causes GCC to emit warnings - // about losing some attribute information. This is irrelevant to us as we - // only take their size, so the following pragma are harmless. - #if defined(__GNUC__) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wignored-attributes" - #endif - - template - static constexpr int BestRegisterCount() { - constexpr std::size_t RegisterSize = sizeof(SIMDRegisterType); - constexpr std::size_t LaneSize = sizeof(LaneType); - - static_assert(RegisterSize >= LaneSize); - static_assert(MaxRegisters <= NumRegistersSIMD); - static_assert(MaxRegisters > 0); - static_assert(NumRegistersSIMD > 0); - static_assert(RegisterSize % LaneSize == 0); - static_assert((NumLanes * LaneSize) % RegisterSize == 0); - - const int ideal = (NumLanes * LaneSize) / RegisterSize; - if (ideal <= MaxRegisters) - return ideal; - - // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters - for (int divisor = MaxRegisters; divisor > 1; --divisor) - if (ideal % divisor == 0) - return divisor; - - return 1; - } - - #if defined(__GNUC__) - #pragma GCC diagnostic pop - #endif - - public: - static constexpr int NumRegs = - BestRegisterCount(); - static constexpr int NumPsqtRegs = - BestRegisterCount(); - - static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; - static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4; - - static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions"); - static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets"); -#endif -}; -} - -#endif diff --git a/src/numa.h b/src/numa.h deleted file mode 100644 index afd868dd085ce26f15c198fc47dbc323abf019a8..0000000000000000000000000000000000000000 --- a/src/numa.h +++ /dev/null @@ -1,1718 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef NUMA_H_INCLUDED -#define NUMA_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "shm.h" - -// We support linux very well, but we explicitly do NOT support Android, -// because there is no affected systems, not worth maintaining. -#if defined(__linux__) && !defined(__ANDROID__) - #if !defined(_GNU_SOURCE) - #define _GNU_SOURCE - #endif - #include -#elif defined(_WIN64) - - #if _WIN32_WINNT < 0x0601 - #undef _WIN32_WINNT - #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes - #endif - -// On Windows each processor group can have up to 64 processors. -// https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups -static constexpr size_t WIN_PROCESSOR_GROUP_SIZE = 64; - - #if !defined(NOMINMAX) - #define NOMINMAX - #endif - #include - #if defined small - #undef small - #endif - -// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadselectedcpusetmasks -using SetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT); - -// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getthreadselectedcpusetmasks -using GetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT, PUSHORT); - -#endif - -#include "misc.h" - -namespace Stockfish { - -using CpuIndex = size_t; -using NumaIndex = size_t; - -inline CpuIndex get_hardware_concurrency() { - CpuIndex concurrency = std::thread::hardware_concurrency(); - - // Get all processors across all processor groups on windows, since - // hardware_concurrency() only returns the number of processors in - // the first group, because only these are available to std::thread. -#ifdef _WIN64 - concurrency = std::max(concurrency, GetActiveProcessorCount(ALL_PROCESSOR_GROUPS)); -#endif - - return concurrency; -} - -inline const CpuIndex SYSTEM_THREADS_NB = std::max(1, get_hardware_concurrency()); - -#if defined(_WIN64) - -struct WindowsAffinity { - std::optional> oldApi; - std::optional> newApi; - - // We also provide diagnostic for when the affinity is set to nullopt - // whether it was due to being indeterminate. If affinity is indeterminate - // it is best to assume it is not set at all, so consistent with the meaning - // of the nullopt affinity. - bool isNewDeterminate = true; - bool isOldDeterminate = true; - - std::optional> get_combined() const { - if (!oldApi.has_value()) - return newApi; - if (!newApi.has_value()) - return oldApi; - - std::set intersect; - std::set_intersection(oldApi->begin(), oldApi->end(), newApi->begin(), newApi->end(), - std::inserter(intersect, intersect.begin())); - return intersect; - } - - // Since Windows 11 and Windows Server 2022 thread affinities can span - // processor groups and can be set as such by a new WinAPI function. However, - // we may need to force using the old API if we detect that the process has - // affinity set by the old API already and we want to override that. Due to the - // limitations of the old API we cannot detect its use reliably. There will be - // cases where we detect not use but it has actually been used and vice versa. - - bool likely_used_old_api() const { return oldApi.has_value() || !isOldDeterminate; } -}; - -inline std::pair> get_process_group_affinity() { - - // GetProcessGroupAffinity requires the GroupArray argument to be - // aligned to 4 bytes instead of just 2. - static constexpr size_t GroupArrayMinimumAlignment = 4; - static_assert(GroupArrayMinimumAlignment >= alignof(USHORT)); - - // The function should succeed the second time, but it may fail if the group - // affinity has changed between GetProcessGroupAffinity calls. In such case - // we consider this a hard error, as we Cannot work with unstable affinities - // anyway. - static constexpr int MAX_TRIES = 2; - USHORT GroupCount = 1; - for (int i = 0; i < MAX_TRIES; ++i) - { - auto GroupArray = std::make_unique( - GroupCount + (GroupArrayMinimumAlignment / alignof(USHORT) - 1)); - - USHORT* GroupArrayAligned = align_ptr_up(GroupArray.get()); - - const BOOL status = - GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, GroupArrayAligned); - - if (status == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) - { - break; - } - - if (status != 0) - { - return std::make_pair(status, - std::vector(GroupArrayAligned, GroupArrayAligned + GroupCount)); - } - } - - return std::make_pair(0, std::vector()); -} - -// On Windows there are two ways to set affinity, and therefore 2 ways to get it. -// These are not consistent, so we have to check both. In some cases it is actually -// not possible to determine affinity. For example when two different threads have -// affinity on different processor groups, set using SetThreadAffinityMask, we cannot -// retrieve the actual affinities. -// From documentation on GetProcessAffinityMask: -// > If the calling process contains threads in multiple groups, -// > the function returns zero for both affinity masks. -// In such cases we just give up and assume we have affinity for all processors. -// nullopt means no affinity is set, that is, all processors are allowed -inline WindowsAffinity get_process_affinity() { - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto GetThreadSelectedCpuSetMasks_f = GetThreadSelectedCpuSetMasks_t( - (void (*)()) GetProcAddress(k32, "GetThreadSelectedCpuSetMasks")); - - BOOL status = 0; - - WindowsAffinity affinity; - - if (GetThreadSelectedCpuSetMasks_f != nullptr) - { - USHORT RequiredMaskCount; - status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), nullptr, 0, &RequiredMaskCount); - - // We expect ERROR_INSUFFICIENT_BUFFER from GetThreadSelectedCpuSetMasks, - // but other failure is an actual error. - if (status == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) - { - affinity.isNewDeterminate = false; - } - else if (RequiredMaskCount > 0) - { - // If RequiredMaskCount then these affinities were never set, but it's - // not consistent so GetProcessAffinityMask may still return some affinity. - auto groupAffinities = std::make_unique(RequiredMaskCount); - - status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(), - RequiredMaskCount, &RequiredMaskCount); - - if (status == 0) - { - affinity.isNewDeterminate = false; - } - else - { - std::set cpus; - - for (USHORT i = 0; i < RequiredMaskCount; ++i) - { - const size_t procGroupIndex = groupAffinities[i].Group; - - for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) - { - if (groupAffinities[i].Mask & (KAFFINITY(1) << j)) - cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); - } - } - - affinity.newApi = std::move(cpus); - } - } - } - - // NOTE: There is no way to determine full affinity using the old API if - // individual threads set affinity on different processor groups. - - DWORD_PTR proc, sys; - status = GetProcessAffinityMask(GetCurrentProcess(), &proc, &sys); - - // If proc == 0 then we cannot determine affinity because it spans processor groups. - // On Windows 11 and Server 2022 it will instead - // > If, however, hHandle specifies a handle to the current process, the function - // > always uses the calling thread's primary group (which by default is the same - // > as the process' primary group) in order to set the - // > lpProcessAffinityMask and lpSystemAffinityMask. - // So it will never be indeterminate here. We can only make assumptions later. - if (status == 0 || proc == 0) - { - affinity.isOldDeterminate = false; - return affinity; - } - - // If SetProcessAffinityMask was never called the affinity must span - // all processor groups, but if it was called it must only span one. - - std::vector groupAffinity; // We need to capture this later and capturing - // from structured bindings requires c++20. - - std::tie(status, groupAffinity) = get_process_group_affinity(); - if (status == 0) - { - affinity.isOldDeterminate = false; - return affinity; - } - - if (groupAffinity.size() == 1) - { - // We detect the case when affinity is set to all processors and correctly - // leave affinity.oldApi as nullopt. - if (GetActiveProcessorGroupCount() != 1 || proc != sys) - { - std::set cpus; - - const size_t procGroupIndex = groupAffinity[0]; - - const uint64_t mask = static_cast(proc); - for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) - { - if (mask & (KAFFINITY(1) << j)) - cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); - } - - affinity.oldApi = std::move(cpus); - } - } - else - { - // If we got here it means that either SetProcessAffinityMask was never set - // or we're on Windows 11/Server 2022. - - // Since Windows 11 and Windows Server 2022 the behaviour of - // GetProcessAffinityMask changed: - // > If, however, hHandle specifies a handle to the current process, - // > the function always uses the calling thread's primary group - // > (which by default is the same as the process' primary group) - // > in order to set the lpProcessAffinityMask and lpSystemAffinityMask. - // In which case we can actually retrieve the full affinity. - - if (GetThreadSelectedCpuSetMasks_f != nullptr) - { - std::thread th([&]() { - std::set cpus; - bool isAffinityFull = true; - - for (auto procGroupIndex : groupAffinity) - { - const int numActiveProcessors = - GetActiveProcessorCount(static_cast(procGroupIndex)); - - // We have to schedule to two different processors - // and & the affinities we get. Otherwise our processor - // choice could influence the resulting affinity. - // We assume the processor IDs within the group are - // filled sequentially from 0. - uint64_t procCombined = std::numeric_limits::max(); - uint64_t sysCombined = std::numeric_limits::max(); - - for (int i = 0; i < std::min(numActiveProcessors, 2); ++i) - { - GROUP_AFFINITY GroupAffinity; - std::memset(&GroupAffinity, 0, sizeof(GROUP_AFFINITY)); - GroupAffinity.Group = static_cast(procGroupIndex); - - GroupAffinity.Mask = static_cast(1) << i; - - status = - SetThreadGroupAffinity(GetCurrentThread(), &GroupAffinity, nullptr); - if (status == 0) - { - affinity.isOldDeterminate = false; - return; - } - - SwitchToThread(); - - DWORD_PTR proc2, sys2; - status = GetProcessAffinityMask(GetCurrentProcess(), &proc2, &sys2); - if (status == 0) - { - affinity.isOldDeterminate = false; - return; - } - - procCombined &= static_cast(proc2); - sysCombined &= static_cast(sys2); - } - - if (procCombined != sysCombined) - isAffinityFull = false; - - for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) - { - if (procCombined & (KAFFINITY(1) << j)) - cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); - } - } - - // We have to detect the case where the affinity was not set, - // or is set to all processors so that we correctly produce as - // std::nullopt result. - if (!isAffinityFull) - { - affinity.oldApi = std::move(cpus); - } - }); - - th.join(); - } - } - - return affinity; -} - -// Type machinery used to emulate Cache->GroupCount - -template -struct HasGroupCount: std::false_type {}; - -template -struct HasGroupCount().Cache.GroupCount)>>: std::true_type { -}; - -template::value, bool> = true> -std::set readCacheMembers(const T* info, Pred&& is_cpu_allowed) { - std::set cpus; - // On Windows 10 this will read a 0 because GroupCount doesn't exist - int groupCount = std::max(info->Cache.GroupCount, WORD(1)); - for (WORD procGroup = 0; procGroup < groupCount; ++procGroup) - { - for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number) - { - WORD groupNumber = info->Cache.GroupMasks[procGroup].Group; - const CpuIndex c = static_cast(groupNumber) * WIN_PROCESSOR_GROUP_SIZE - + static_cast(number); - if (!(info->Cache.GroupMasks[procGroup].Mask & (1ULL << number)) || !is_cpu_allowed(c)) - continue; - cpus.insert(c); - } - } - return cpus; -} - -template::value, bool> = true> -std::set readCacheMembers(const T* info, Pred&& is_cpu_allowed) { - std::set cpus; - for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number) - { - WORD groupNumber = info->Cache.GroupMask.Group; - const CpuIndex c = static_cast(groupNumber) * WIN_PROCESSOR_GROUP_SIZE - + static_cast(number); - if (!(info->Cache.GroupMask.Mask & (1ULL << number)) || !is_cpu_allowed(c)) - continue; - cpus.insert(c); - } - return cpus; -} - -#endif - -#if defined(__linux__) && !defined(__ANDROID__) - -inline std::set get_process_affinity() { - - std::set cpus; - - // For unsupported systems, or in case of a soft error, we may assume - // all processors are available for use. - [[maybe_unused]] auto set_to_all_cpus = [&]() { - for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) - cpus.insert(c); - }; - - // cpu_set_t by default holds 1024 entries. This may not be enough soon, - // but there is no easy way to determine how many threads there actually - // is. In this case we just choose a reasonable upper bound. - static constexpr CpuIndex MaxNumCpus = 1024 * 64; - - cpu_set_t* mask = CPU_ALLOC(MaxNumCpus); - if (mask == nullptr) - std::exit(EXIT_FAILURE); - - const size_t masksize = CPU_ALLOC_SIZE(MaxNumCpus); - - CPU_ZERO_S(masksize, mask); - - const int status = sched_getaffinity(0, masksize, mask); - - if (status != 0) - { - CPU_FREE(mask); - std::exit(EXIT_FAILURE); - } - - for (CpuIndex c = 0; c < MaxNumCpus; ++c) - if (CPU_ISSET_S(c, masksize, mask)) - cpus.insert(c); - - CPU_FREE(mask); - - return cpus; -} - -#endif - -#if defined(__linux__) && !defined(__ANDROID__) - -inline static const auto STARTUP_PROCESSOR_AFFINITY = get_process_affinity(); - -#elif defined(_WIN64) - -inline static const auto STARTUP_PROCESSOR_AFFINITY = get_process_affinity(); -inline static const auto STARTUP_USE_OLD_AFFINITY_API = - STARTUP_PROCESSOR_AFFINITY.likely_used_old_api(); - -#endif - -// We want to abstract the purpose of storing the numa node index somewhat. -// Whoever is using this does not need to know the specifics of the replication -// machinery to be able to access NUMA replicated memory. -class NumaReplicatedAccessToken { - public: - NumaReplicatedAccessToken() : - n(0) {} - - explicit NumaReplicatedAccessToken(NumaIndex idx) : - n(idx) {} - - NumaIndex get_numa_index() const { return n; } - - private: - NumaIndex n; -}; - -struct L3Domain { - NumaIndex systemNumaIndex{}; - std::set cpus{}; -}; - -// Use system NUMA nodes -struct SystemNumaPolicy {}; -// Use system-reported L3 domains -struct L3DomainsPolicy {}; -// Group system-reported L3 domains until they reach bundleSize -struct BundledL3Policy { - size_t bundleSize; -}; - -using NumaAutoPolicy = std::variant; - -// Designed as immutable, because there is no good reason to alter an already -// existing config in a way that doesn't require recreating it completely, and -// it would be complex and expensive to maintain class invariants. -// The CPU (processor) numbers always correspond to the actual numbering used -// by the system. The NUMA node numbers MAY NOT correspond to the system's -// numbering of the NUMA nodes. In particular, by default, if the processor has -// non-uniform cache access within a NUMA node (i.e., a non-unified L3 cache structure), -// then L3 domains within a system NUMA node will be used to subdivide it -// into multiple logical NUMA nodes in the config. Additionally, empty nodes may -// be removed, or the user may create custom nodes. -// -// As a special case, when performing system-wide replication of read-only data -// (i.e., LazyNumaReplicatedSystemWide), the system NUMA node is used, rather than -// custom or L3-aware nodes. See that class's get_discriminator() function. -// -// It is guaranteed that NUMA nodes are NOT empty: every node exposed by NumaConfig -// has at least one processor assigned. -// -// We use startup affinities so as not to modify its own behaviour in time. -// -// Since Stockfish doesn't support exceptions all places where an exception -// should be thrown are replaced by std::exit. -class NumaConfig { - public: - NumaConfig() : - highestCpuIndex(0), - customAffinity(false) { - const auto numCpus = SYSTEM_THREADS_NB; - add_cpu_range_to_node(NumaIndex{0}, CpuIndex{0}, numCpus - 1); - } - - // This function gets a NumaConfig based on the system's provided information. - // The available policies are documented above. - static NumaConfig from_system([[maybe_unused]] const NumaAutoPolicy& policy, - bool respectProcessAffinity = true) { - NumaConfig cfg = empty(); - -#if !((defined(__linux__) && !defined(__ANDROID__)) || defined(_WIN64)) - // Fallback for unsupported systems. - for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) - cfg.add_cpu_to_node(NumaIndex{0}, c); -#else - - #if defined(_WIN64) - - std::optional> allowedCpus; - - if (respectProcessAffinity) - allowedCpus = STARTUP_PROCESSOR_AFFINITY.get_combined(); - - // The affinity cannot be determined in all cases on Windows, - // but we at least guarantee that the number of allowed processors - // is >= number of processors in the affinity mask. In case the user - // is not satisfied they must set the processor numbers explicitly. - auto is_cpu_allowed = [&allowedCpus](CpuIndex c) { - return !allowedCpus.has_value() || allowedCpus->count(c) == 1; - }; - - #elif defined(__linux__) && !defined(__ANDROID__) - - std::set allowedCpus; - - if (respectProcessAffinity) - allowedCpus = STARTUP_PROCESSOR_AFFINITY; - - auto is_cpu_allowed = [respectProcessAffinity, &allowedCpus](CpuIndex c) { - return !respectProcessAffinity || allowedCpus.count(c) == 1; - }; - - #endif - - bool l3Success = false; - if (!std::holds_alternative(policy)) - { - size_t l3BundleSize = 0; - if (const auto* v = std::get_if(&policy)) - { - l3BundleSize = v->bundleSize; - } - if (auto l3Cfg = - try_get_l3_aware_config(respectProcessAffinity, l3BundleSize, is_cpu_allowed)) - { - cfg = std::move(*l3Cfg); - l3Success = true; - } - } - if (!l3Success) - cfg = from_system_numa(respectProcessAffinity, is_cpu_allowed); - - #if defined(_WIN64) - // Split the NUMA nodes to be contained within a group if necessary. - // This is needed between Windows 10 Build 20348 and Windows 11, because - // the new NUMA allocation behaviour was introduced while there was - // still no way to set thread affinity spanning multiple processor groups. - // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support - // We also do this is if need to force old API for some reason. - // - // 2024-08-26: It appears that we need to actually always force this behaviour. - // While Windows allows this to work now, such assignments have bad interaction - // with the scheduler - in particular it still prefers scheduling on the thread's - // "primary" node, even if it means scheduling SMT processors first. - // See https://github.com/official-stockfish/Stockfish/issues/5551 - // See https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups - // - // Each process is assigned a primary group at creation, and by default all - // of its threads' primary group is the same. Each thread's ideal processor - // is in the thread's primary group, so threads will preferentially be - // scheduled to processors on their primary group, but they are able to - // be scheduled to processors on any other group. - // - // used to be guarded by if (STARTUP_USE_OLD_AFFINITY_API) - { - NumaConfig splitCfg = empty(); - - NumaIndex splitNodeIndex = 0; - for (const auto& cpus : cfg.nodes) - { - if (cpus.empty()) - continue; - - size_t lastProcGroupIndex = *(cpus.begin()) / WIN_PROCESSOR_GROUP_SIZE; - for (CpuIndex c : cpus) - { - const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; - if (procGroupIndex != lastProcGroupIndex) - { - splitNodeIndex += 1; - lastProcGroupIndex = procGroupIndex; - } - splitCfg.add_cpu_to_node(splitNodeIndex, c); - } - splitNodeIndex += 1; - } - - cfg = std::move(splitCfg); - } - #endif - -#endif - - // We have to ensure no empty NUMA nodes persist. - cfg.remove_empty_numa_nodes(); - - // If the user explicitly opts out from respecting the current process affinity - // then it may be inconsistent with the current affinity (obviously), so we - // consider it custom. - if (!respectProcessAffinity) - cfg.customAffinity = true; - - return cfg; - } - - // ':'-separated numa nodes - // ','-separated cpu indices - // supports "first-last" range syntax for cpu indices - // For example "0-15,128-143:16-31,144-159:32-47,160-175:48-63,176-191" - static NumaConfig from_string(const std::string& s) { - NumaConfig cfg = empty(); - - NumaIndex n = 0; - for (auto&& nodeStr : split(s, ":")) - { - auto indices = indices_from_shortened_string(std::string(nodeStr)); - if (!indices.empty()) - { - for (auto idx : indices) - { - if (!cfg.add_cpu_to_node(n, CpuIndex(idx))) - std::exit(EXIT_FAILURE); - } - - n += 1; - } - } - - cfg.customAffinity = true; - - return cfg; - } - - NumaConfig(const NumaConfig&) = delete; - NumaConfig(NumaConfig&&) = default; - NumaConfig& operator=(const NumaConfig&) = delete; - NumaConfig& operator=(NumaConfig&&) = default; - - bool is_cpu_assigned(CpuIndex n) const { return nodeByCpu.count(n) == 1; } - - NumaIndex num_numa_nodes() const { return nodes.size(); } - - CpuIndex num_cpus_in_numa_node(NumaIndex n) const { - assert(n < nodes.size()); - return nodes[n].size(); - } - - CpuIndex num_cpus() const { return nodeByCpu.size(); } - - bool requires_memory_replication() const { return customAffinity || nodes.size() > 1; } - - std::string to_string() const { - std::string str; - - bool isFirstNode = true; - for (auto&& cpus : nodes) - { - if (!isFirstNode) - str += ":"; - - bool isFirstSet = true; - auto rangeStart = cpus.begin(); - for (auto it = cpus.begin(); it != cpus.end(); ++it) - { - auto next = std::next(it); - if (next == cpus.end() || *next != *it + 1) - { - // cpus[i] is at the end of the range (may be of size 1) - if (!isFirstSet) - str += ","; - - const CpuIndex last = *it; - - if (it != rangeStart) - { - const CpuIndex first = *rangeStart; - - str += std::to_string(first); - str += "-"; - str += std::to_string(last); - } - else - str += std::to_string(last); - - rangeStart = next; - isFirstSet = false; - } - } - - isFirstNode = false; - } - - return str; - } - - bool suggests_binding_threads(CpuIndex numThreads) const { - // If we can reasonably determine that the threads cannot be contained - // by the OS within the first NUMA node then we advise distributing - // and binding threads. When the threads are not bound we can only use - // NUMA memory replicated objects from the first node, so when the OS - // has to schedule on other nodes we lose performance. We also suggest - // binding if there's enough threads to distribute among nodes with minimal - // disparity. We try to ignore small nodes, in particular the empty ones. - - // If the affinity set by the user does not match the affinity given by - // the OS then binding is necessary to ensure the threads are running on - // correct processors. - if (customAffinity) - return true; - - // We obviously cannot distribute a single thread, so a single thread - // should never be bound. - if (numThreads <= 1) - return false; - - size_t largestNodeSize = 0; - for (auto&& cpus : nodes) - if (cpus.size() > largestNodeSize) - largestNodeSize = cpus.size(); - - auto is_node_small = [largestNodeSize](const std::set& node) { - static constexpr double SmallNodeThreshold = 0.6; - return static_cast(node.size()) / static_cast(largestNodeSize) - <= SmallNodeThreshold; - }; - - size_t numNotSmallNodes = 0; - for (auto&& cpus : nodes) - if (!is_node_small(cpus)) - numNotSmallNodes += 1; - - return (numThreads > largestNodeSize / 2 || numThreads >= numNotSmallNodes * 4) - && nodes.size() > 1; - } - - std::vector distribute_threads_among_numa_nodes(CpuIndex numThreads) const { - std::vector ns; - - if (nodes.size() == 1) - { - // Special case for when there's no NUMA nodes. This doesn't buy us - // much, but let's keep the default path simple. - ns.resize(numThreads, NumaIndex{0}); - } - else - { - std::vector occupation(nodes.size(), 0); - for (CpuIndex c = 0; c < numThreads; ++c) - { - NumaIndex bestNode{0}; - float bestNodeFill = std::numeric_limits::max(); - for (NumaIndex n = 0; n < nodes.size(); ++n) - { - float fill = - static_cast(occupation[n] + 1) / static_cast(nodes[n].size()); - // NOTE: Do we want to perhaps fill the first available node - // up to 50% first before considering other nodes? - // Probably not, because it would interfere with running - // multiple instances. We basically shouldn't favor any - // particular node. - if (fill < bestNodeFill) - { - bestNode = n; - bestNodeFill = fill; - } - } - ns.emplace_back(bestNode); - occupation[bestNode] += 1; - } - } - - return ns; - } - - NumaReplicatedAccessToken bind_current_thread_to_numa_node(NumaIndex n) const { - if (n >= nodes.size() || nodes[n].size() == 0) - std::exit(EXIT_FAILURE); - -#if defined(__linux__) && !defined(__ANDROID__) - - cpu_set_t* mask = CPU_ALLOC(highestCpuIndex + 1); - if (mask == nullptr) - std::exit(EXIT_FAILURE); - - const size_t masksize = CPU_ALLOC_SIZE(highestCpuIndex + 1); - - CPU_ZERO_S(masksize, mask); - - for (CpuIndex c : nodes[n]) - CPU_SET_S(c, masksize, mask); - - const int status = sched_setaffinity(0, masksize, mask); - - CPU_FREE(mask); - - if (status != 0) - std::exit(EXIT_FAILURE); - - // We yield this thread just to be sure it gets rescheduled. - // This is defensive, allowed because this code is not performance critical. - sched_yield(); - -#elif defined(_WIN64) - - // Requires Windows 11. No good way to set thread affinity spanning - // processor groups before that. - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t( - (void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks")); - - // We ALWAYS set affinity with the new API if available, because - // there's no downsides, and we forcibly keep it consistent with - // the old API should we need to use it. I.e. we always keep this - // as a superset of what we set with SetThreadGroupAffinity. - if (SetThreadSelectedCpuSetMasks_f != nullptr) - { - // Only available on Windows 11 and Windows Server 2022 onwards - const USHORT numProcGroups = USHORT( - ((highestCpuIndex + 1) + WIN_PROCESSOR_GROUP_SIZE - 1) / WIN_PROCESSOR_GROUP_SIZE); - auto groupAffinities = std::make_unique(numProcGroups); - std::memset(groupAffinities.get(), 0, sizeof(GROUP_AFFINITY) * numProcGroups); - for (WORD i = 0; i < numProcGroups; ++i) - groupAffinities[i].Group = i; - - for (CpuIndex c : nodes[n]) - { - const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; - const size_t idxWithinProcGroup = c % WIN_PROCESSOR_GROUP_SIZE; - groupAffinities[procGroupIndex].Mask |= KAFFINITY(1) << idxWithinProcGroup; - } - - HANDLE hThread = GetCurrentThread(); - - const BOOL status = - SetThreadSelectedCpuSetMasks_f(hThread, groupAffinities.get(), numProcGroups); - if (status == 0) - std::exit(EXIT_FAILURE); - - // We yield this thread just to be sure it gets rescheduled. - // This is defensive, allowed because this code is not performance critical. - SwitchToThread(); - } - - // Sometimes we need to force the old API, but do not use it unless necessary. - if (SetThreadSelectedCpuSetMasks_f == nullptr || STARTUP_USE_OLD_AFFINITY_API) - { - // On earlier windows version (since windows 7) we cannot run a single thread - // on multiple processor groups, so we need to restrict the group. - // We assume the group of the first processor listed for this node. - // Processors from outside this group will not be assigned for this thread. - // Normally this won't be an issue because windows used to assign NUMA nodes - // such that they cannot span processor groups. However, since Windows 10 - // Build 20348 the behaviour changed, so there's a small window of versions - // between this and Windows 11 that might exhibit problems with not all - // processors being utilized. - // - // We handle this in NumaConfig::from_system by manually splitting the - // nodes when we detect that there is no function to set affinity spanning - // processor nodes. This is required because otherwise our thread distribution - // code may produce suboptimal results. - // - // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support - GROUP_AFFINITY affinity; - std::memset(&affinity, 0, sizeof(GROUP_AFFINITY)); - // We use an ordered set to be sure to get the smallest cpu number here. - const size_t forcedProcGroupIndex = *(nodes[n].begin()) / WIN_PROCESSOR_GROUP_SIZE; - affinity.Group = static_cast(forcedProcGroupIndex); - for (CpuIndex c : nodes[n]) - { - const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; - const size_t idxWithinProcGroup = c % WIN_PROCESSOR_GROUP_SIZE; - // We skip processors that are not in the same processor group. - // If everything was set up correctly this will never be an issue, - // but we have to account for bad NUMA node specification. - if (procGroupIndex != forcedProcGroupIndex) - continue; - - affinity.Mask |= KAFFINITY(1) << idxWithinProcGroup; - } - - HANDLE hThread = GetCurrentThread(); - - const BOOL status = SetThreadGroupAffinity(hThread, &affinity, nullptr); - if (status == 0) - std::exit(EXIT_FAILURE); - - // We yield this thread just to be sure it gets rescheduled. This is - // defensive, allowed because this code is not performance critical. - SwitchToThread(); - } - -#endif - - return NumaReplicatedAccessToken(n); - } - - template - void execute_on_numa_node(NumaIndex n, FuncT&& f) const { - std::thread th([this, &f, n]() { - bind_current_thread_to_numa_node(n); - std::forward(f)(); - }); - - th.join(); - } - - std::vector> nodes; - std::map nodeByCpu; - - private: - CpuIndex highestCpuIndex; - - bool customAffinity; - - static NumaConfig empty() { return NumaConfig(EmptyNodeTag{}); } - - struct EmptyNodeTag {}; - - NumaConfig(EmptyNodeTag) : - highestCpuIndex(0), - customAffinity(false) {} - - void remove_empty_numa_nodes() { - std::vector> newNodes; - for (auto&& cpus : nodes) - if (!cpus.empty()) - newNodes.emplace_back(std::move(cpus)); - nodes = std::move(newNodes); - } - - // Returns true if successful - // Returns false if failed, i.e. when the cpu is already present - // strong guarantee, the structure remains unmodified - bool add_cpu_to_node(NumaIndex n, CpuIndex c) { - if (is_cpu_assigned(c)) - return false; - - while (nodes.size() <= n) - nodes.emplace_back(); - - nodes[n].insert(c); - nodeByCpu[c] = n; - - if (c > highestCpuIndex) - highestCpuIndex = c; - - return true; - } - - // Returns true if successful - // Returns false if failed, i.e. when any of the cpus is already present - // strong guarantee, the structure remains unmodified - bool add_cpu_range_to_node(NumaIndex n, CpuIndex cfirst, CpuIndex clast) { - for (CpuIndex c = cfirst; c <= clast; ++c) - if (is_cpu_assigned(c)) - return false; - - while (nodes.size() <= n) - nodes.emplace_back(); - - for (CpuIndex c = cfirst; c <= clast; ++c) - { - nodes[n].insert(c); - nodeByCpu[c] = n; - } - - if (clast > highestCpuIndex) - highestCpuIndex = clast; - - return true; - } - - static std::vector indices_from_shortened_string(const std::string& s) { - std::vector indices; - - if (s.empty()) - return indices; - - for (const auto& ss : split(s, ",")) - { - if (ss.empty()) - continue; - - auto parts = split(ss, "-"); - if (parts.size() == 1) - { - const CpuIndex c = CpuIndex{str_to_size_t(std::string(parts[0]))}; - indices.emplace_back(c); - } - else if (parts.size() == 2) - { - const CpuIndex cfirst = CpuIndex{str_to_size_t(std::string(parts[0]))}; - const CpuIndex clast = CpuIndex{str_to_size_t(std::string(parts[1]))}; - for (size_t c = cfirst; c <= clast; ++c) - { - indices.emplace_back(c); - } - } - } - - return indices; - } - - // This function queries the system for the mapping of processors to NUMA nodes. - // On Linux we read from standardized kernel sysfs, with a fallback to single NUMA - // node. On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see - // comment for Windows implementation of get_process_affinity. - template - static NumaConfig from_system_numa([[maybe_unused]] bool respectProcessAffinity, - [[maybe_unused]] Pred&& is_cpu_allowed) { - NumaConfig cfg = empty(); - -#if defined(__linux__) && !defined(__ANDROID__) - - // On Linux things are straightforward, since there's no processor groups and - // any thread can be scheduled on all processors. - // We try to gather this information from the sysfs first - // https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node - - bool useFallback = false; - auto fallback = [&]() { - useFallback = true; - cfg = empty(); - }; - - // /sys/devices/system/node/online contains information about active NUMA nodes - auto nodeIdsStr = read_file_to_string("/sys/devices/system/node/online"); - if (!nodeIdsStr.has_value() || nodeIdsStr->empty()) - { - fallback(); - } - else - { - remove_whitespace(*nodeIdsStr); - for (size_t n : indices_from_shortened_string(*nodeIdsStr)) - { - // /sys/devices/system/node/node.../cpulist - std::string path = - std::string("/sys/devices/system/node/node") + std::to_string(n) + "/cpulist"; - auto cpuIdsStr = read_file_to_string(path); - // Now, we only bail if the file does not exist. Some nodes may be - // empty, that's fine. An empty node still has a file that appears - // to have some whitespace, so we need to handle that. - if (!cpuIdsStr.has_value()) - { - fallback(); - break; - } - else - { - remove_whitespace(*cpuIdsStr); - for (size_t c : indices_from_shortened_string(*cpuIdsStr)) - { - if (is_cpu_allowed(c)) - cfg.add_cpu_to_node(n, c); - } - } - } - } - - if (useFallback) - { - for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) - if (is_cpu_allowed(c)) - cfg.add_cpu_to_node(NumaIndex{0}, c); - } - -#elif defined(_WIN64) - - WORD numProcGroups = GetActiveProcessorGroupCount(); - for (WORD procGroup = 0; procGroup < numProcGroups; ++procGroup) - { - for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number) - { - PROCESSOR_NUMBER procnum; - procnum.Group = procGroup; - procnum.Number = number; - procnum.Reserved = 0; - USHORT nodeNumber; - - const BOOL status = GetNumaProcessorNodeEx(&procnum, &nodeNumber); - const CpuIndex c = static_cast(procGroup) * WIN_PROCESSOR_GROUP_SIZE - + static_cast(number); - if (status != 0 && nodeNumber != std::numeric_limits::max() - && is_cpu_allowed(c)) - { - cfg.add_cpu_to_node(nodeNumber, c); - } - } - } - -#else - - abort(); // should not reach here - -#endif - - return cfg; - } - - template - static std::optional try_get_l3_aware_config( - bool respectProcessAffinity, size_t bundleSize, [[maybe_unused]] Pred&& is_cpu_allowed) { - // Get the normal system configuration so we know to which NUMA node - // each L3 domain belongs. - NumaConfig systemConfig = - NumaConfig::from_system(SystemNumaPolicy{}, respectProcessAffinity); - std::vector l3Domains; - -#if defined(__linux__) && !defined(__ANDROID__) - - std::set seenCpus; - auto nextUnseenCpu = [&seenCpus]() { - for (CpuIndex i = 0;; ++i) - if (!seenCpus.count(i)) - return i; - }; - - while (true) - { - CpuIndex next = nextUnseenCpu(); - auto siblingsStr = - read_file_to_string("/sys/devices/system/cpu/cpu" + std::to_string(next) - + "/cache/index3/shared_cpu_list"); - - if (!siblingsStr.has_value() || siblingsStr->empty()) - { - break; // we have read all available CPUs - } - - L3Domain domain; - for (size_t c : indices_from_shortened_string(*siblingsStr)) - { - if (is_cpu_allowed(c)) - { - domain.systemNumaIndex = systemConfig.nodeByCpu.at(c); - domain.cpus.insert(c); - } - seenCpus.insert(c); - } - if (!domain.cpus.empty()) - { - l3Domains.emplace_back(std::move(domain)); - } - } - -#elif defined(_WIN64) - - DWORD bufSize = 0; - GetLogicalProcessorInformationEx(RelationCache, nullptr, &bufSize); - if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) - return std::nullopt; - - std::vector buffer(bufSize); - auto info = reinterpret_cast(buffer.data()); - if (!GetLogicalProcessorInformationEx(RelationCache, info, &bufSize)) - return std::nullopt; - - while (reinterpret_cast(info) < buffer.data() + bufSize) - { - info = std::launder(info); - if (info->Relationship == RelationCache && info->Cache.Level == 3) - { - L3Domain domain{}; - domain.cpus = readCacheMembers(info, is_cpu_allowed); - if (!domain.cpus.empty()) - { - domain.systemNumaIndex = systemConfig.nodeByCpu.at(*domain.cpus.begin()); - l3Domains.push_back(std::move(domain)); - } - } - // Variable length data structure, advance to next - info = reinterpret_cast( - reinterpret_cast(info) + info->Size); - } -#endif - - if (!l3Domains.empty()) - return {NumaConfig::from_l3_info(std::move(l3Domains), bundleSize)}; - - return std::nullopt; - } - - - static NumaConfig from_l3_info(std::vector&& domains, size_t bundleSize) { - assert(!domains.empty()); - - std::map> list; - for (auto& d : domains) - list[d.systemNumaIndex].emplace_back(std::move(d)); - - NumaConfig cfg = empty(); - NumaIndex n = 0; - for (auto& [_, ds] : list) - { - bool changed; - // Scan through pairs and merge them. With roughly equal L3 sizes, should give - // a decent distribution. - do - { - changed = false; - for (size_t j = 0; j + 1 < ds.size(); ++j) - { - if (ds[j].cpus.size() + ds[j + 1].cpus.size() <= bundleSize) - { - changed = true; - ds[j].cpus.merge(ds[j + 1].cpus); - ds.erase(ds.begin() + j + 1); - } - } - // ds.size() has decreased if changed is true, so this loop will terminate - } while (changed); - for (const L3Domain& d : ds) - { - const NumaIndex dn = n++; - for (CpuIndex cpu : d.cpus) - { - cfg.add_cpu_to_node(dn, cpu); - } - } - } - return cfg; - } -}; - -class NumaReplicationContext; - -// Instances of this class are tracked by the NumaReplicationContext instance. -// NumaReplicationContext informs all tracked instances when NUMA configuration changes. -class NumaReplicatedBase { - public: - NumaReplicatedBase(NumaReplicationContext& ctx); - - NumaReplicatedBase(const NumaReplicatedBase&) = delete; - NumaReplicatedBase(NumaReplicatedBase&& other) noexcept; - - NumaReplicatedBase& operator=(const NumaReplicatedBase&) = delete; - NumaReplicatedBase& operator=(NumaReplicatedBase&& other) noexcept; - - virtual void on_numa_config_changed() = 0; - virtual ~NumaReplicatedBase(); - - const NumaConfig& get_numa_config() const; - - private: - NumaReplicationContext* context; -}; - -// We force boxing with a unique_ptr. If this becomes an issue due to added -// indirection we may need to add an option for a custom boxing type. When the -// NUMA config changes the value stored at the index 0 is replicated to other nodes. -template -class NumaReplicated: public NumaReplicatedBase { - public: - using ReplicatorFuncType = std::function; - - NumaReplicated(NumaReplicationContext& ctx) : - NumaReplicatedBase(ctx) { - replicate_from(T{}); - } - - NumaReplicated(NumaReplicationContext& ctx, T&& source) : - NumaReplicatedBase(ctx) { - replicate_from(std::move(source)); - } - - NumaReplicated(const NumaReplicated&) = delete; - NumaReplicated(NumaReplicated&& other) noexcept : - NumaReplicatedBase(std::move(other)), - instances(std::exchange(other.instances, {})) {} - - NumaReplicated& operator=(const NumaReplicated&) = delete; - NumaReplicated& operator=(NumaReplicated&& other) noexcept { - NumaReplicatedBase::operator=(*this, std::move(other)); - instances = std::exchange(other.instances, {}); - - return *this; - } - - NumaReplicated& operator=(T&& source) { - replicate_from(std::move(source)); - - return *this; - } - - ~NumaReplicated() override = default; - - const T& operator[](NumaReplicatedAccessToken token) const { - assert(token.get_numa_index() < instances.size()); - return *(instances[token.get_numa_index()]); - } - - const T& operator*() const { return *(instances[0]); } - - const T* operator->() const { return instances[0].get(); } - - template - void modify_and_replicate(FuncT&& f) { - auto source = std::move(instances[0]); - std::forward(f)(*source); - replicate_from(std::move(*source)); - } - - void on_numa_config_changed() override { - // Use the first one as the source. It doesn't matter which one we use, - // because they all must be identical, but the first one is guaranteed to exist. - auto source = std::move(instances[0]); - replicate_from(std::move(*source)); - } - - private: - std::vector> instances; - - void replicate_from(T&& source) { - instances.clear(); - - const NumaConfig& cfg = get_numa_config(); - if (cfg.requires_memory_replication()) - { - for (NumaIndex n = 0; n < cfg.num_numa_nodes(); ++n) - { - cfg.execute_on_numa_node( - n, [this, &source]() { instances.emplace_back(std::make_unique(source)); }); - } - } - else - { - assert(cfg.num_numa_nodes() == 1); - // We take advantage of the fact that replication is not required - // and reuse the source value, avoiding one copy operation. - instances.emplace_back(std::make_unique(std::move(source))); - } - } -}; - -// We force boxing with a unique_ptr. If this becomes an issue due to added -// indirection we may need to add an option for a custom boxing type. -template -class LazyNumaReplicated: public NumaReplicatedBase { - public: - using ReplicatorFuncType = std::function; - - LazyNumaReplicated(NumaReplicationContext& ctx) : - NumaReplicatedBase(ctx) { - prepare_replicate_from(T{}); - } - - LazyNumaReplicated(NumaReplicationContext& ctx, T&& source) : - NumaReplicatedBase(ctx) { - prepare_replicate_from(std::move(source)); - } - - LazyNumaReplicated(const LazyNumaReplicated&) = delete; - LazyNumaReplicated(LazyNumaReplicated&& other) noexcept : - NumaReplicatedBase(std::move(other)), - instances(std::exchange(other.instances, {})) {} - - LazyNumaReplicated& operator=(const LazyNumaReplicated&) = delete; - LazyNumaReplicated& operator=(LazyNumaReplicated&& other) noexcept { - NumaReplicatedBase::operator=(*this, std::move(other)); - instances = std::exchange(other.instances, {}); - - return *this; - } - - LazyNumaReplicated& operator=(T&& source) { - prepare_replicate_from(std::move(source)); - - return *this; - } - - ~LazyNumaReplicated() override = default; - - const T& operator[](NumaReplicatedAccessToken token) const { - assert(token.get_numa_index() < instances.size()); - ensure_present(token.get_numa_index()); - return *(instances[token.get_numa_index()]); - } - - const T& operator*() const { return *(instances[0]); } - - const T* operator->() const { return instances[0].get(); } - - template - void modify_and_replicate(FuncT&& f) { - auto source = std::move(instances[0]); - std::forward(f)(*source); - prepare_replicate_from(std::move(*source)); - } - - void on_numa_config_changed() override { - // Use the first one as the source. It doesn't matter which one we use, - // because they all must be identical, but the first one is guaranteed to exist. - auto source = std::move(instances[0]); - prepare_replicate_from(std::move(*source)); - } - - private: - mutable std::vector> instances; - mutable std::mutex mutex; - - void ensure_present(NumaIndex idx) const { - assert(idx < instances.size()); - - if (instances[idx] != nullptr) - return; - - assert(idx != 0); - - std::unique_lock lock(mutex); - // Check again for races. - if (instances[idx] != nullptr) - return; - - const NumaConfig& cfg = get_numa_config(); - cfg.execute_on_numa_node( - idx, [this, idx]() { instances[idx] = std::make_unique(*instances[0]); }); - } - - void prepare_replicate_from(T&& source) { - instances.clear(); - - const NumaConfig& cfg = get_numa_config(); - if (cfg.requires_memory_replication()) - { - assert(cfg.num_numa_nodes() > 0); - - // We just need to make sure the first instance is there. - // Note that we cannot move here as we need to reallocate the data - // on the correct NUMA node. - cfg.execute_on_numa_node( - 0, [this, &source]() { instances.emplace_back(std::make_unique(source)); }); - - // Prepare others for lazy init. - instances.resize(cfg.num_numa_nodes()); - } - else - { - assert(cfg.num_numa_nodes() == 1); - // We take advantage of the fact that replication is not required - // and reuse the source value, avoiding one copy operation. - instances.emplace_back(std::make_unique(std::move(source))); - } - } -}; - -// Utilizes shared memory. -template -class LazyNumaReplicatedSystemWide: public NumaReplicatedBase { - public: - using ReplicatorFuncType = std::function; - - LazyNumaReplicatedSystemWide(NumaReplicationContext& ctx) : - NumaReplicatedBase(ctx) { - prepare_replicate_from(std::make_unique()); - } - - LazyNumaReplicatedSystemWide(NumaReplicationContext& ctx, std::unique_ptr&& source) : - NumaReplicatedBase(ctx) { - prepare_replicate_from(std::move(source)); - } - - LazyNumaReplicatedSystemWide(const LazyNumaReplicatedSystemWide&) = delete; - LazyNumaReplicatedSystemWide(LazyNumaReplicatedSystemWide&& other) noexcept : - NumaReplicatedBase(std::move(other)), - instances(std::exchange(other.instances, {})) {} - - LazyNumaReplicatedSystemWide& operator=(const LazyNumaReplicatedSystemWide&) = delete; - LazyNumaReplicatedSystemWide& operator=(LazyNumaReplicatedSystemWide&& other) noexcept { - NumaReplicatedBase::operator=(*this, std::move(other)); - instances = std::exchange(other.instances, {}); - - return *this; - } - - LazyNumaReplicatedSystemWide& operator=(std::unique_ptr&& source) { - prepare_replicate_from(std::move(source)); - - return *this; - } - - ~LazyNumaReplicatedSystemWide() override = default; - - const T& operator[](NumaReplicatedAccessToken token) const { - assert(token.get_numa_index() < instances.size()); - ensure_present(token.get_numa_index()); - return *(instances[token.get_numa_index()]); - } - - const T& operator*() const { return *(instances[0]); } - - const T* operator->() const { return &*instances[0]; } - - std::vector>> - get_status_and_errors() const { - std::vector>> - status; - status.reserve(instances.size()); - - for (const auto& instance : instances) - { - status.emplace_back(instance.get_status(), instance.get_error_message()); - } - - return status; - } - - template - void modify_and_replicate(FuncT&& f) { - auto source = std::make_unique(*instances[0]); - std::forward(f)(*source); - prepare_replicate_from(std::move(source)); - } - - void on_numa_config_changed() override { - // Use the first one as the source. It doesn't matter which one we use, - // because they all must be identical, but the first one is guaranteed to exist. - auto source = std::make_unique(*instances[0]); - prepare_replicate_from(std::move(source)); - } - - private: - mutable std::vector> instances; - mutable std::mutex mutex; - - std::size_t get_discriminator(NumaIndex idx) const { - const NumaConfig& cfg = get_numa_config(); - const NumaConfig& cfg_sys = NumaConfig::from_system(SystemNumaPolicy{}, false); - // as a discriminator, locate the hardware/system numadomain this cpuindex belongs to - CpuIndex cpu = *cfg.nodes[idx].begin(); // get a CpuIndex from NumaIndex - NumaIndex sys_idx = cfg_sys.is_cpu_assigned(cpu) ? cfg_sys.nodeByCpu.at(cpu) : 0; - std::string s = cfg_sys.to_string() + "$" + std::to_string(sys_idx); - return static_cast(hash_string(s)); - } - - void ensure_present(NumaIndex idx) const { - assert(idx < instances.size()); - - if (instances[idx] != nullptr) - return; - - assert(idx != 0); - - std::unique_lock lock(mutex); - // Check again for races. - if (instances[idx] != nullptr) - return; - - const NumaConfig& cfg = get_numa_config(); - cfg.execute_on_numa_node(idx, [this, idx]() { - instances[idx] = SystemWideSharedConstant(*instances[0], get_discriminator(idx)); - }); - } - - void prepare_replicate_from(std::unique_ptr&& source) { - instances.clear(); - - const NumaConfig& cfg = get_numa_config(); - // We just need to make sure the first instance is there. - // Note that we cannot move here as we need to reallocate the data - // on the correct NUMA node. - // Even in the case of a single NUMA node we have to copy since it's shared memory. - if (cfg.requires_memory_replication()) - { - assert(cfg.num_numa_nodes() > 0); - - cfg.execute_on_numa_node(0, [this, &source]() { - instances.emplace_back(SystemWideSharedConstant(*source, get_discriminator(0))); - }); - - // Prepare others for lazy init. - instances.resize(cfg.num_numa_nodes()); - } - else - { - assert(cfg.num_numa_nodes() == 1); - instances.emplace_back(SystemWideSharedConstant(*source, get_discriminator(0))); - } - } -}; - -class NumaReplicationContext { - public: - NumaReplicationContext(NumaConfig&& cfg) : - config(std::move(cfg)) {} - - NumaReplicationContext(const NumaReplicationContext&) = delete; - NumaReplicationContext(NumaReplicationContext&&) = delete; - - NumaReplicationContext& operator=(const NumaReplicationContext&) = delete; - NumaReplicationContext& operator=(NumaReplicationContext&&) = delete; - - ~NumaReplicationContext() { - // The context must outlive replicated objects - if (!trackedReplicatedObjects.empty()) - std::exit(EXIT_FAILURE); - } - - void attach(NumaReplicatedBase* obj) { - assert(trackedReplicatedObjects.count(obj) == 0); - trackedReplicatedObjects.insert(obj); - } - - void detach(NumaReplicatedBase* obj) { - assert(trackedReplicatedObjects.count(obj) == 1); - trackedReplicatedObjects.erase(obj); - } - - // oldObj may be invalid at this point - void move_attached([[maybe_unused]] NumaReplicatedBase* oldObj, NumaReplicatedBase* newObj) { - assert(trackedReplicatedObjects.count(oldObj) == 1); - assert(trackedReplicatedObjects.count(newObj) == 0); - trackedReplicatedObjects.erase(oldObj); - trackedReplicatedObjects.insert(newObj); - } - - void set_numa_config(NumaConfig&& cfg) { - config = std::move(cfg); - for (auto&& obj : trackedReplicatedObjects) - obj->on_numa_config_changed(); - } - - const NumaConfig& get_numa_config() const { return config; } - - private: - NumaConfig config; - - // std::set uses std::less by default, which is required for pointer comparison - std::set trackedReplicatedObjects; -}; - -inline NumaReplicatedBase::NumaReplicatedBase(NumaReplicationContext& ctx) : - context(&ctx) { - context->attach(this); -} - -inline NumaReplicatedBase::NumaReplicatedBase(NumaReplicatedBase&& other) noexcept : - context(std::exchange(other.context, nullptr)) { - context->move_attached(&other, this); -} - -inline NumaReplicatedBase& NumaReplicatedBase::operator=(NumaReplicatedBase&& other) noexcept { - context = std::exchange(other.context, nullptr); - - context->move_attached(&other, this); - - return *this; -} - -inline NumaReplicatedBase::~NumaReplicatedBase() { - if (context != nullptr) - context->detach(this); -} - -inline const NumaConfig& NumaReplicatedBase::get_numa_config() const { - return context->get_numa_config(); -} - -} // namespace Stockfish - - -#endif // #ifndef NUMA_H_INCLUDED diff --git a/src/perft.h b/src/perft.h deleted file mode 100644 index 24d125cbf4739681549069e0afa3447bb468b77d..0000000000000000000000000000000000000000 --- a/src/perft.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef PERFT_H_INCLUDED -#define PERFT_H_INCLUDED - -#include - -#include "movegen.h" -#include "position.h" -#include "types.h" -#include "uci.h" - -namespace Stockfish::Benchmark { - -// Utility to verify move generation. All the leaf nodes up -// to the given depth are generated and counted, and the sum is returned. -template -uint64_t perft(Position& pos, Depth depth) { - - StateInfo st; - - uint64_t cnt, nodes = 0; - const bool leaf = (depth == 2); - - for (const auto& m : MoveList(pos)) - { - if (Root && depth <= 1) - cnt = 1, nodes++; - else - { - pos.do_move(m, st); - cnt = leaf ? MoveList(pos).size() : perft(pos, depth - 1); - nodes += cnt; - pos.undo_move(m); - } - if (Root) - sync_cout << UCIEngine::move(m, pos.is_chess960()) << ": " << cnt << sync_endl; - } - return nodes; -} - -inline uint64_t perft(const std::string& fen, Depth depth, bool isChess960) { - StateInfo st; - Position p; - p.set(fen, isChess960, &st); - - return perft(p, depth); -} -} - -#endif // PERFT_H_INCLUDED diff --git a/src/position.cpp b/src/position.cpp deleted file mode 100644 index daadf59ecb57079dea4bf3c61ea064b9b4564ac9..0000000000000000000000000000000000000000 --- a/src/position.cpp +++ /dev/null @@ -1,1566 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "position.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bitboard.h" -#include "history.h" -#include "misc.h" -#include "movegen.h" -#include "syzygy/tbprobe.h" -#include "tt.h" -#include "uci.h" - -using std::string; - -namespace Stockfish { - -namespace Zobrist { - -Key psq[PIECE_NB][SQUARE_NB]; -Key enpassant[FILE_NB]; -Key castling[CASTLING_RIGHT_NB]; -Key side, noPawns; - -} - -namespace { - -constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); - -static constexpr Piece Pieces[] = {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, - B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING}; -} // namespace - - -// Returns an ASCII representation of the position -std::ostream& operator<<(std::ostream& os, const Position& pos) { - - os << "\n +---+---+---+---+---+---+---+---+\n"; - - for (Rank r = RANK_8;; --r) - { - for (File f = FILE_A; f <= FILE_H; ++f) - os << " | " << PieceToChar[pos.piece_on(make_square(f, r))]; - - os << " | " << (1 + r) << "\n +---+---+---+---+---+---+---+---+\n"; - - if (r == RANK_1) - break; - } - - os << " a b c d e f g h\n" - << "\nFen: " << pos.fen() << "\nKey: " << std::hex << std::uppercase << std::setfill('0') - << std::setw(16) << pos.key() << std::setfill(' ') << std::dec << "\nCheckers: "; - - for (Bitboard b = pos.checkers(); b;) - os << UCIEngine::square(pop_lsb(b)) << " "; - - if (Tablebases::MaxCardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) - { - StateInfo st; - - Position p; - p.set(pos.fen(), pos.is_chess960(), &st); - Tablebases::ProbeState s1, s2; - Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1); - int dtz = Tablebases::probe_dtz(p, &s2); - os << "\nTablebases WDL: " << std::setw(4) << wdl << " (" << s1 << ")" - << "\nTablebases DTZ: " << std::setw(4) << dtz << " (" << s2 << ")"; - } - - return os; -} - - -// Implements Marcel van Kervinck's cuckoo algorithm to detect repetition of positions -// for 3-fold repetition draws. The algorithm uses two hash tables with Zobrist hashes -// to allow fast detection of recurring positions. For details see: -// http://web.archive.org/web/20201107002606/https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf - -// First and second hash functions for indexing the cuckoo tables -inline int H1(Key h) { return h & 0x1fff; } -inline int H2(Key h) { return (h >> 16) & 0x1fff; } - -// Cuckoo tables with Zobrist hashes of valid reversible moves, and the moves themselves -std::array cuckoo; -std::array cuckooMove; - -// Initializes at startup the various arrays used to compute hash keys -void Position::init() { - - PRNG rng(1070372); - - for (Piece pc : Pieces) - for (Square s = SQ_A1; s <= SQ_H8; ++s) - Zobrist::psq[pc][s] = rng.rand(); - // pawns on these squares will promote - std::fill_n(Zobrist::psq[W_PAWN] + SQ_A8, 8, 0); - std::fill_n(Zobrist::psq[B_PAWN], 8, 0); - - for (File f = FILE_A; f <= FILE_H; ++f) - Zobrist::enpassant[f] = rng.rand(); - - for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr) - Zobrist::castling[cr] = rng.rand(); - - Zobrist::side = rng.rand(); - Zobrist::noPawns = rng.rand(); - - // Prepare the cuckoo tables - cuckoo.fill(0); - cuckooMove.fill(Move::none()); - [[maybe_unused]] int count = 0; - for (Piece pc : Pieces) - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - for (Square s2 = Square(s1 + 1); s2 <= SQ_H8; ++s2) - if ((type_of(pc) != PAWN) && (attacks_bb(type_of(pc), s1, 0) & s2)) - { - Move move = Move(s1, s2); - Key key = Zobrist::psq[pc][s1] ^ Zobrist::psq[pc][s2] ^ Zobrist::side; - int i = H1(key); - while (true) - { - std::swap(cuckoo[i], key); - std::swap(cuckooMove[i], move); - if (move == Move::none()) // Arrived at empty slot? - break; - i = (i == H1(key)) ? H2(key) : H1(key); // Push victim to alternative slot - } - count++; - } - assert(count == 3668); -} - - -// Initializes the position object with the given FEN string. -// This function is not very robust - make sure that input FENs are correct, -// this is assumed to be the responsibility of the GUI. -Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si) { - /* - A FEN string defines a particular position using only the ASCII character set. - - A FEN string contains six fields separated by a space. The fields are: - - 1) Piece placement (from white's perspective). Each rank is described, starting - with rank 8 and ending with rank 1. Within each rank, the contents of each - square are described from file A through file H. Following the Standard - Algebraic Notation (SAN), each piece is identified by a single letter taken - from the standard English names. White pieces are designated using upper-case - letters ("PNBRQK") whilst Black uses lowercase ("pnbrqk"). Blank squares are - noted using digits 1 through 8 (the number of blank squares), and "/" - separates ranks. - - 2) Active color. "w" means white moves next, "b" means black. - - 3) Castling availability. If neither side can castle, this is "-". Otherwise, - this has one or more letters: "K" (White can castle kingside), "Q" (White - can castle queenside), "k" (Black can castle kingside), and/or "q" (Black - can castle queenside). - - 4) En passant target square (in algebraic notation). If there's no en passant - target square, this is "-". If a pawn has just made a 2-square move, this - is the position "behind" the pawn. Following X-FEN standard, this is recorded - only if there is a pawn in position to make an en passant capture, and if - there really is a pawn that might have advanced two squares. - - 5) Halfmove clock. This is the number of halfmoves since the last pawn advance - or capture. This is used to determine if a draw can be claimed under the - fifty-move rule. - - 6) Fullmove number. The number of the full move. It starts at 1, and is - incremented after Black's move. -*/ - - unsigned char col, row, token; - size_t idx; - Square sq = SQ_A8; - std::istringstream ss(fenStr); - - std::memset(reinterpret_cast(this), 0, sizeof(Position)); - std::memset(si, 0, sizeof(StateInfo)); - st = si; - - ss >> std::noskipws; - - // 1. Piece placement - while ((ss >> token) && !isspace(token)) - { - if (isdigit(token)) - sq += (token - '0') * EAST; // Advance the given number of files - - else if (token == '/') - sq += 2 * SOUTH; - - else if ((idx = PieceToChar.find(token)) != string::npos) - { - put_piece(Piece(idx), sq); - ++sq; - } - } - - // 2. Active color - ss >> token; - sideToMove = (token == 'w' ? WHITE : BLACK); - ss >> token; - - // 3. Castling availability. Compatible with 3 standards: Normal FEN standard, - // Shredder-FEN that uses the letters of the columns on which the rooks began - // the game instead of KQkq and also X-FEN standard that, in case of Chess960, - // if an inner rook is associated with the castling right, the castling tag is - // replaced by the file letter of the involved rook, as for the Shredder-FEN. - while ((ss >> token) && !isspace(token)) - { - Square rsq; - Color c = islower(token) ? BLACK : WHITE; - Piece rook = make_piece(c, ROOK); - - token = char(toupper(token)); - - if (token == 'K') - for (rsq = relative_square(c, SQ_H1); piece_on(rsq) != rook; --rsq) - {} - - else if (token == 'Q') - for (rsq = relative_square(c, SQ_A1); piece_on(rsq) != rook; ++rsq) - {} - - else if (token >= 'A' && token <= 'H') - rsq = make_square(File(token - 'A'), relative_rank(c, RANK_1)); - - else - continue; - - set_castling_right(c, rsq); - } - - // 4. En passant square. - // Ignore if square is invalid or not on side to move relative rank 6. - bool enpassant = false, legalEP = false; - - if (((ss >> col) && (col >= 'a' && col <= 'h')) - && ((ss >> row) && (row == (sideToMove == WHITE ? '6' : '3')))) - { - st->epSquare = make_square(File(col - 'a'), Rank(row - '1')); - - Bitboard pawns = attacks_bb(st->epSquare, ~sideToMove) & pieces(sideToMove, PAWN); - Bitboard target = (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))); - Bitboard occ = pieces() ^ target ^ st->epSquare; - - // En passant square will be considered only if - // a) side to move have a pawn threatening epSquare - // b) there is an enemy pawn in front of epSquare - // c) there is no piece on epSquare or behind epSquare - enpassant = - pawns && target && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))); - - // If no pawn can execute the en passant capture without leaving the king in check, don't record the epSquare - while (pawns) - legalEP |= !(attackers_to(square(sideToMove), occ ^ pop_lsb(pawns)) - & pieces(~sideToMove) & ~target); - } - - if (!enpassant || !legalEP) - st->epSquare = SQ_NONE; - - // 5-6. Halfmove clock and fullmove number - ss >> std::skipws >> st->rule50 >> gamePly; - - // Convert from fullmove starting from 1 to gamePly starting from 0, - // handle also common incorrect FEN with fullmove = 0. - gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK); - - chess960 = isChess960; - set_state(); - - assert(pos_is_ok()); - - return *this; -} - - -// Helper function used to set castling -// rights given the corresponding color and the rook starting square. -void Position::set_castling_right(Color c, Square rfrom) { - - Square kfrom = square(c); - CastlingRights cr = c & (kfrom < rfrom ? KING_SIDE : QUEEN_SIDE); - - st->castlingRights |= cr; - castlingRightsMask[kfrom] |= cr; - castlingRightsMask[rfrom] |= cr; - castlingRookSquare[cr] = rfrom; - - Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1); - Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1); - - castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto)) & ~(kfrom | rfrom); -} - - -// Sets king attacks to detect if a move gives check -void Position::set_check_info() const { - - update_slider_blockers(WHITE); - update_slider_blockers(BLACK); - - Square ksq = square(~sideToMove); - - st->checkSquares[PAWN] = attacks_bb(ksq, ~sideToMove); - st->checkSquares[KNIGHT] = attacks_bb(ksq); - st->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); - st->checkSquares[ROOK] = attacks_bb(ksq, pieces()); - st->checkSquares[QUEEN] = st->checkSquares[BISHOP] | st->checkSquares[ROOK]; - st->checkSquares[KING] = 0; -} - - -// Computes the hash keys of the position, and other -// data that once computed is updated incrementally as moves are made. -// The function is only used when a new position is set up -void Position::set_state() const { - - st->key = 0; - st->minorPieceKey = 0; - st->nonPawnKey[WHITE] = st->nonPawnKey[BLACK] = 0; - st->pawnKey = Zobrist::noPawns; - st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO; - st->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); - - set_check_info(); - - for (Bitboard b = pieces(); b;) - { - Square s = pop_lsb(b); - Piece pc = piece_on(s); - st->key ^= Zobrist::psq[pc][s]; - - if (type_of(pc) == PAWN) - st->pawnKey ^= Zobrist::psq[pc][s]; - - else - { - st->nonPawnKey[color_of(pc)] ^= Zobrist::psq[pc][s]; - - if (type_of(pc) != KING) - { - st->nonPawnMaterial[color_of(pc)] += PieceValue[pc]; - - if (type_of(pc) <= BISHOP) - st->minorPieceKey ^= Zobrist::psq[pc][s]; - } - } - } - - if (st->epSquare != SQ_NONE) - st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; - - if (sideToMove == BLACK) - st->key ^= Zobrist::side; - - st->key ^= Zobrist::castling[st->castlingRights]; - st->materialKey = compute_material_key(); -} - -Key Position::compute_material_key() const { - Key k = 0; - for (Piece pc : Pieces) - for (int cnt = 0; cnt < pieceCount[pc]; ++cnt) - k ^= Zobrist::psq[pc][8 + cnt]; - return k; -} - - -// Overload to initialize the position object with the given endgame code string -// like "KBPKN". It's mainly a helper to get the material key out of an endgame code. -Position& Position::set(const string& code, Color c, StateInfo* si) { - - assert(code[0] == 'K'); - - string sides[] = {code.substr(code.find('K', 1)), // Weak - code.substr(0, std::min(code.find('v'), code.find('K', 1)))}; // Strong - - assert(sides[0].length() > 0 && sides[0].length() < 8); - assert(sides[1].length() > 0 && sides[1].length() < 8); - - std::transform(sides[c].begin(), sides[c].end(), sides[c].begin(), tolower); - - string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/" + sides[1] - + char(8 - sides[1].length() + '0') + "/8 w - - 0 10"; - - return set(fenStr, false, si); -} - - -// Returns a FEN representation of the position. In case of -// Chess960 the Shredder-FEN notation is used. This is mainly a debugging function. -string Position::fen() const { - - int emptyCnt; - std::ostringstream ss; - - for (Rank r = RANK_8;; --r) - { - for (File f = FILE_A; f <= FILE_H; ++f) - { - for (emptyCnt = 0; f <= FILE_H && empty(make_square(f, r)); ++f) - ++emptyCnt; - - if (emptyCnt) - ss << emptyCnt; - - if (f <= FILE_H) - ss << PieceToChar[piece_on(make_square(f, r))]; - } - - if (r == RANK_1) - break; - ss << '/'; - } - - ss << (sideToMove == WHITE ? " w " : " b "); - - if (can_castle(WHITE_OO)) - ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OO))) : 'K'); - - if (can_castle(WHITE_OOO)) - ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OOO))) : 'Q'); - - if (can_castle(BLACK_OO)) - ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OO))) : 'k'); - - if (can_castle(BLACK_OOO)) - ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OOO))) : 'q'); - - if (!can_castle(ANY_CASTLING)) - ss << '-'; - - ss << (ep_square() == SQ_NONE ? " - " : " " + UCIEngine::square(ep_square()) + " ") - << st->rule50 << " " << 1 + (gamePly - (sideToMove == BLACK)) / 2; - - return ss.str(); -} - -// Calculates st->blockersForKing[c] and st->pinners[~c], -// which store respectively the pieces preventing king of color c from being in check -// and the slider pieces of color ~c pinning pieces of color c to the king. -void Position::update_slider_blockers(Color c) const { - - Square ksq = square(c); - - st->blockersForKing[c] = 0; - st->pinners[~c] = 0; - - // Snipers are sliders that attack 's' when a piece and other snipers are removed - Bitboard snipers = ((attacks_bb(ksq) & pieces(QUEEN, ROOK)) - | (attacks_bb(ksq) & pieces(QUEEN, BISHOP))) - & pieces(~c); - Bitboard occupancy = pieces() ^ snipers; - - while (snipers) - { - Square sniperSq = pop_lsb(snipers); - Bitboard b = between_bb(ksq, sniperSq) & occupancy; - - if (b && !more_than_one(b)) - { - st->blockersForKing[c] |= b; - if (b & pieces(c)) - st->pinners[~c] |= sniperSq; - } - } -} - - -// Computes a bitboard of all pieces which attack a given square. -// Slider attacks use the occupied bitboard to indicate occupancy. -Bitboard Position::attackers_to(Square s, Bitboard occupied) const { - - return (attacks_bb(s, occupied) & pieces(ROOK, QUEEN)) - | (attacks_bb(s, occupied) & pieces(BISHOP, QUEEN)) - | (attacks_bb(s, BLACK) & pieces(WHITE, PAWN)) - | (attacks_bb(s, WHITE) & pieces(BLACK, PAWN)) - | (attacks_bb(s) & pieces(KNIGHT)) | (attacks_bb(s) & pieces(KING)); -} - -bool Position::attackers_to_exist(Square s, Bitboard occupied, Color c) const { - - return (attacks_bb(s, occupied) & pieces(c, ROOK, QUEEN)) - || (attacks_bb(s, occupied) & pieces(c, BISHOP, QUEEN)) - || (attacks_bb(s, ~c) & pieces(c, PAWN)) - || (attacks_bb(s) & pieces(c, KNIGHT)) || (attacks_bb(s) & pieces(c, KING)); -} - -// Tests whether a pseudo-legal move is legal -bool Position::legal(Move m) const { - - assert(m.is_ok()); - - Color us = sideToMove; - Square from = m.from_sq(); - Square to = m.to_sq(); - - assert(color_of(moved_piece(m)) == us); - assert(piece_on(square(us)) == make_piece(us, KING)); - - // En passant captures are a tricky special case. Because they are rather - // uncommon, we do it simply by testing whether the king is attacked after - // the move is made. - if (m.type_of() == EN_PASSANT) - { - Square ksq = square(us); - Square capsq = to - pawn_push(us); - Bitboard occupied = (pieces() ^ from ^ capsq) | to; - - assert(to == ep_square()); - assert(moved_piece(m) == make_piece(us, PAWN)); - assert(piece_on(capsq) == make_piece(~us, PAWN)); - assert(piece_on(to) == NO_PIECE); - - return !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, ROOK)) - && !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, BISHOP)); - } - - // Castling moves generation does not check if the castling path is clear of - // enemy attacks, it is delayed at a later time: now! - if (m.type_of() == CASTLING) - { - // After castling, the rook and king final positions are the same in - // Chess960 as they would be in standard chess. - to = relative_square(us, to > from ? SQ_G1 : SQ_C1); - Direction step = to > from ? WEST : EAST; - - for (Square s = to; s != from; s += step) - if (attackers_to_exist(s, pieces(), ~us)) - return false; - - // In case of Chess960, verify if the Rook blocks some checks. - // For instance an enemy queen in SQ_A1 when castling rook is in SQ_B1. - return !chess960 || !(blockers_for_king(us) & m.to_sq()); - } - - // If the moving piece is a king, check whether the destination square is - // attacked by the opponent. - if (type_of(piece_on(from)) == KING) - return !(attackers_to_exist(to, pieces() ^ from, ~us)); - - // A non-king move is legal if and only if it is not pinned or it - // is moving along the ray towards or away from the king. - return !(blockers_for_king(us) & from) || line_bb(from, to) & pieces(us, KING); -} - - -// Takes a random move and tests whether the move is -// pseudo-legal. It is used to validate moves from TT that can be corrupted -// due to SMP concurrent access or hash position key aliasing. -bool Position::pseudo_legal(const Move m) const { - - Color us = sideToMove; - Square from = m.from_sq(); - Square to = m.to_sq(); - Piece pc = moved_piece(m); - - // Use a slower but simpler function for uncommon cases - // yet we skip the legality check of MoveList(). - if (m.type_of() != NORMAL) - return checkers() ? MoveList(*this).contains(m) - : MoveList(*this).contains(m); - - // Is not a promotion, so the promotion piece must be empty - assert(m.promotion_type() - KNIGHT == NO_PIECE_TYPE); - - // If the 'from' square is not occupied by a piece belonging to the side to - // move, the move is obviously not legal. - if (pc == NO_PIECE || color_of(pc) != us) - return false; - - // The destination square cannot be occupied by a friendly piece - if (pieces(us) & to) - return false; - - // Handle the special case of a pawn move - if (type_of(pc) == PAWN) - { - // We have already handled promotion moves, so destination cannot be on the 8th/1st rank - if ((Rank8BB | Rank1BB) & to) - return false; - - // Check if it's a valid capture, single push, or double push - const bool isCapture = bool(attacks_bb(from, us) & pieces(~us) & to); - const bool isSinglePush = (from + pawn_push(us) == to) && empty(to); - const bool isDoublePush = (from + 2 * pawn_push(us) == to) - && (relative_rank(us, from) == RANK_2) && empty(to) - && empty(to - pawn_push(us)); - - if (!(isCapture || isSinglePush || isDoublePush)) - return false; - } - else if (!(attacks_bb(type_of(pc), from, pieces()) & to)) - return false; - - // Evasions generator already takes care to avoid some kind of illegal moves - // and legal() relies on this. We therefore have to take care that the same - // kind of moves are filtered out here. - if (checkers()) - { - if (type_of(pc) != KING) - { - // Double check? In this case, a king move is required - if (more_than_one(checkers())) - return false; - - // Our move must be a blocking interposition or a capture of the checking piece - if (!(between_bb(square(us), lsb(checkers())) & to)) - return false; - } - // In case of king moves under check we have to remove the king so as to catch - // invalid moves like b1a1 when opposite queen is on c1. - else if (attackers_to_exist(to, pieces() ^ from, ~us)) - return false; - } - - return true; -} - - -// Tests whether a pseudo-legal move gives a check -bool Position::gives_check(Move m) const { - - assert(m.is_ok()); - assert(color_of(moved_piece(m)) == sideToMove); - - Square from = m.from_sq(); - Square to = m.to_sq(); - - // Is there a direct check? - if (check_squares(type_of(piece_on(from))) & to) - return true; - - // Is there a discovered check? - if (blockers_for_king(~sideToMove) & from) - return !(line_bb(from, to) & pieces(~sideToMove, KING)) || m.type_of() == CASTLING; - - switch (m.type_of()) - { - case NORMAL : - return false; - - case PROMOTION : - return attacks_bb(m.promotion_type(), to, pieces() ^ from) & pieces(~sideToMove, KING); - - // En passant capture with check? We have already handled the case of direct - // checks and ordinary discovered check, so the only case we need to handle - // is the unusual case of a discovered check through the captured pawn. - case EN_PASSANT : { - Square capsq = make_square(file_of(to), rank_of(from)); - Bitboard b = (pieces() ^ from ^ capsq) | to; - - return (attacks_bb(square(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK)) - | (attacks_bb(square(~sideToMove), b) - & pieces(sideToMove, QUEEN, BISHOP)); - } - default : //CASTLING - { - // Castling is encoded as 'king captures the rook' - Square rto = relative_square(sideToMove, to > from ? SQ_F1 : SQ_D1); - - return check_squares(ROOK) & rto; - } - } -} - - -// Makes a move, and saves all information necessary -// to a StateInfo object. The move is assumed to be legal. Pseudo-legal -// moves should be filtered out before this function is called. -// If a pointer to the TT table is passed, the entry for the new position -// will be prefetched, and likewise for shared history. -void Position::do_move(Move m, - StateInfo& newSt, - bool givesCheck, - DirtyPiece& dp, - DirtyThreats& dts, - const TranspositionTable* tt = nullptr, - const SharedHistories* history = nullptr) { - - assert(m.is_ok()); - assert(&newSt != st); - - Key k = st->key ^ Zobrist::side; - - // Copy some fields of the old state to our new StateInfo object except the - // ones which are going to be recalculated from scratch anyway and then switch - // our state pointer to point to the new (ready to be updated) state. - std::memcpy(&newSt, st, offsetof(StateInfo, key)); - newSt.previous = st; - st = &newSt; - - // Increment ply counters. In particular, rule50 will be reset to zero later on - // in case of a capture or a pawn move. - ++gamePly; - ++st->rule50; - ++st->pliesFromNull; - - Color us = sideToMove; - Color them = ~us; - Square from = m.from_sq(); - Square to = m.to_sq(); - Piece pc = piece_on(from); - Piece captured = m.type_of() == EN_PASSANT ? make_piece(them, PAWN) : piece_on(to); - - dp.pc = pc; - dp.from = from; - dp.to = to; - dp.add_sq = SQ_NONE; - dts.us = us; - dts.prevKsq = square(us); - dts.threatenedSqs = dts.threateningSqs = 0; - - assert(color_of(pc) == us); - assert(captured == NO_PIECE || color_of(captured) == (m.type_of() != CASTLING ? them : us)); - assert(type_of(captured) != KING); - - if (m.type_of() == CASTLING) - { - assert(pc == make_piece(us, KING)); - assert(captured == make_piece(us, ROOK)); - - Square rfrom, rto; - do_castling(us, from, to, rfrom, rto, &dts, &dp); - - k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; - st->nonPawnKey[us] ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; - captured = NO_PIECE; - } - else if (captured) - { - Square capsq = to; - - // If the captured piece is a pawn, update pawn hash key, otherwise - // update non-pawn material. - if (type_of(captured) == PAWN) - { - if (m.type_of() == EN_PASSANT) - { - capsq -= pawn_push(us); - - assert(pc == make_piece(us, PAWN)); - assert(to == st->epSquare); - assert(relative_rank(us, to) == RANK_6); - assert(piece_on(to) == NO_PIECE); - assert(piece_on(capsq) == make_piece(them, PAWN)); - - // Update board and piece lists in ep case, normal captures are updated later - remove_piece(capsq, &dts); - } - - st->pawnKey ^= Zobrist::psq[captured][capsq]; - } - else - { - st->nonPawnMaterial[them] -= PieceValue[captured]; - st->nonPawnKey[them] ^= Zobrist::psq[captured][capsq]; - - if (type_of(captured) <= BISHOP) - st->minorPieceKey ^= Zobrist::psq[captured][capsq]; - } - - dp.remove_pc = captured; - dp.remove_sq = capsq; - - k ^= Zobrist::psq[captured][capsq]; - st->materialKey ^= - Zobrist::psq[captured][8 + pieceCount[captured] - (m.type_of() != EN_PASSANT)]; - - // Reset rule 50 counter - st->rule50 = 0; - } - else - dp.remove_sq = SQ_NONE; - - // Update hash key - k ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; - - // Reset en passant square - if (st->epSquare != SQ_NONE) - { - k ^= Zobrist::enpassant[file_of(st->epSquare)]; - st->epSquare = SQ_NONE; - } - - // Update castling rights. - k ^= Zobrist::castling[st->castlingRights]; - st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]); - k ^= Zobrist::castling[st->castlingRights]; - - // Move the piece. The tricky Chess960 castling is handled earlier - if (m.type_of() != CASTLING) - { - if (captured && m.type_of() != EN_PASSANT) - { - remove_piece(from, &dts); - swap_piece(to, pc, &dts); - } - else - move_piece(from, to, &dts); - } - - // If the moving piece is a pawn do some special extra work - if (type_of(pc) == PAWN) - { - // Check if the en passant square needs to be set. Accurate e.p. info is needed - // for correct zobrist key generation and 3-fold checking. - if ((int(to) ^ int(from)) == 16) - { - Square epSquare = to - pawn_push(us); - Bitboard pawns = attacks_bb(epSquare, us) & pieces(them, PAWN); - - // If there are no pawns attacking the ep square, ep is not possible. - if (pawns) - { - Square ksq = square(them); - Bitboard notBlockers = ~st->previous->blockersForKing[them]; - bool noDiscovery = (from & notBlockers) || file_of(from) == file_of(ksq); - - // If the pawn gives discovered check, ep is never legal. Else, if at least one - // pawn was not a blocker for the enemy king or lies on the same line as the - // enemy king and en passant square, a legal capture exists. - if (noDiscovery && (pawns & (notBlockers | line_bb(epSquare, ksq)))) - { - st->epSquare = epSquare; - k ^= Zobrist::enpassant[file_of(epSquare)]; - } - } - } - - else if (m.type_of() == PROMOTION) - { - Piece promotion = make_piece(us, m.promotion_type()); - PieceType promotionType = type_of(promotion); - - assert(relative_rank(us, to) == RANK_8); - assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN); - - swap_piece(to, promotion, &dts); - - dp.add_pc = promotion; - dp.add_sq = to; - dp.to = SQ_NONE; - - // Update hash keys - // Zobrist::psq[pc][to] is zero, so we don't need to clear it - k ^= Zobrist::psq[promotion][to]; - st->materialKey ^= Zobrist::psq[promotion][8 + pieceCount[promotion] - 1] - ^ Zobrist::psq[pc][8 + pieceCount[pc]]; - st->nonPawnKey[us] ^= Zobrist::psq[promotion][to]; - - if (promotionType <= BISHOP) - st->minorPieceKey ^= Zobrist::psq[promotion][to]; - - // Update material - st->nonPawnMaterial[us] += PieceValue[promotion]; - } - - // Update pawn hash key - st->pawnKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; - - // Reset rule 50 draw counter - st->rule50 = 0; - } - - else - { - st->nonPawnKey[us] ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; - - if (type_of(pc) <= BISHOP) - st->minorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; - } - - // Update the key with the final value - st->key = k; - if (tt) - prefetch(tt->first_entry(key())); - - if (history) - { - prefetch(&history->pawn_entry(*this)[pc][to]); - prefetch(&history->pawn_correction_entry(*this)); - prefetch(&history->minor_piece_correction_entry(*this)); - prefetch(&history->nonpawn_correction_entry(*this)); - prefetch(&history->nonpawn_correction_entry(*this)); - } - - // Set capture piece - st->capturedPiece = captured; - - // Calculate checkers bitboard (if move gives check) - st->checkersBB = givesCheck ? attackers_to(square(them)) & pieces(us) : 0; - - sideToMove = ~sideToMove; - - // Update king attacks used for fast check detection - set_check_info(); - - // Calculate the repetition info. It is the ply distance from the previous - // occurrence of the same position, negative in the 3-fold case, or zero - // if the position was not repeated. - st->repetition = 0; - int end = std::min(st->rule50, st->pliesFromNull); - if (end >= 4) - { - StateInfo* stp = st->previous->previous; - for (int i = 4; i <= end; i += 2) - { - stp = stp->previous->previous; - if (stp->key == st->key) - { - st->repetition = stp->repetition ? -i : i; - break; - } - } - } - - dts.ksq = square(us); - - assert(pos_is_ok()); - - assert(dp.pc != NO_PIECE); - assert(!(bool(captured) || m.type_of() == CASTLING) ^ (dp.remove_sq != SQ_NONE)); - assert(dp.from != SQ_NONE); - assert(!(dp.add_sq != SQ_NONE) ^ (m.type_of() == PROMOTION || m.type_of() == CASTLING)); -} - - -// Unmakes a move. When it returns, the position should -// be restored to exactly the same state as before the move was made. -void Position::undo_move(Move m) { - - assert(m.is_ok()); - - sideToMove = ~sideToMove; - - Color us = sideToMove; - Square from = m.from_sq(); - Square to = m.to_sq(); - Piece pc = piece_on(to); - - assert(empty(from) || m.type_of() == CASTLING); - assert(type_of(st->capturedPiece) != KING); - - if (m.type_of() == PROMOTION) - { - assert(relative_rank(us, to) == RANK_8); - assert(type_of(pc) == m.promotion_type()); - assert(type_of(pc) >= KNIGHT && type_of(pc) <= QUEEN); - - remove_piece(to); - pc = make_piece(us, PAWN); - put_piece(pc, to); - } - - if (m.type_of() == CASTLING) - { - Square rfrom, rto; - do_castling(us, from, to, rfrom, rto); - } - else - { - move_piece(to, from); // Put the piece back at the source square - - if (st->capturedPiece) - { - Square capsq = to; - - if (m.type_of() == EN_PASSANT) - { - capsq -= pawn_push(us); - - assert(type_of(pc) == PAWN); - assert(to == st->previous->epSquare); - assert(relative_rank(us, to) == RANK_6); - assert(piece_on(capsq) == NO_PIECE); - assert(st->capturedPiece == make_piece(~us, PAWN)); - } - - put_piece(st->capturedPiece, capsq); // Restore the captured piece - } - } - - // Finally point our state pointer back to the previous state - st = st->previous; - --gamePly; - - assert(pos_is_ok()); -} - -template -inline void add_dirty_threat( - DirtyThreats* const dts, Piece pc, Piece threatened, Square s, Square threatenedSq) { - if (PutPiece) - { - dts->threatenedSqs |= threatenedSq; - dts->threateningSqs |= s; - } - - dts->list.push_back({pc, threatened, s, threatenedSq, PutPiece}); -} - -#ifdef USE_AVX512ICL -// Given a DirtyThreat template and bit offsets to insert the piece type and square, write the threats -// present at the given bitboard. -template -void write_multiple_dirties(const Position& p, - Bitboard mask, - DirtyThreat dt_template, - DirtyThreats* dts) { - static_assert(sizeof(DirtyThreat) == 4); - - const __m512i board = _mm512_loadu_si512(p.piece_array().data()); - const __m512i AllSquares = _mm512_set_epi8( - 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, - 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, - 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - - const int dt_count = popcount(mask); - assert(dt_count <= 16); - - const __m512i template_v = _mm512_set1_epi32(dt_template.raw()); - auto* write = dts->list.make_space(dt_count); - - // Extract the list of squares and upconvert to 32 bits. There are never more than 16 - // incoming threats so this is sufficient. - __m512i threat_squares = _mm512_maskz_compress_epi8(mask, AllSquares); - threat_squares = _mm512_cvtepi8_epi32(_mm512_castsi512_si128(threat_squares)); - - __m512i threat_pieces = - _mm512_maskz_permutexvar_epi8(0x1111111111111111ULL, threat_squares, board); - - // Shift the piece and square into place - threat_squares = _mm512_slli_epi32(threat_squares, SqShift); - threat_pieces = _mm512_slli_epi32(threat_pieces, PcShift); - - const __m512i dirties = - _mm512_ternarylogic_epi32(template_v, threat_squares, threat_pieces, 254 /* A | B | C */); - _mm512_storeu_si512(write, dirties); -} -#endif - -template -void Position::update_piece_threats(Piece pc, - Square s, - DirtyThreats* const dts, - [[maybe_unused]] Bitboard noRaysContaining) const { - const Bitboard occupied = pieces(); - const Bitboard rookQueens = pieces(ROOK, QUEEN); - const Bitboard bishopQueens = pieces(BISHOP, QUEEN); - const Bitboard rAttacks = attacks_bb(s, occupied); - const Bitboard bAttacks = attacks_bb(s, occupied); - const Bitboard kings = pieces(KING); - Bitboard occupiedNoK = occupied ^ kings; - - Bitboard sliders = (rookQueens & rAttacks) | (bishopQueens & bAttacks); - auto process_sliders = [&](bool addDirectAttacks) { - while (sliders) - { - Square sliderSq = pop_lsb(sliders); - Piece slider = piece_on(sliderSq); - - const Bitboard ray = RayPassBB[sliderSq][s]; - const Bitboard discovered = ray & (rAttacks | bAttacks) & occupiedNoK; - - assert(!more_than_one(discovered)); - if (discovered && (RayPassBB[sliderSq][s] & noRaysContaining) != noRaysContaining) - { - const Square threatenedSq = lsb(discovered); - const Piece threatenedPc = piece_on(threatenedSq); - add_dirty_threat(dts, slider, threatenedPc, sliderSq, threatenedSq); - } - - if (addDirectAttacks) - add_dirty_threat(dts, slider, pc, sliderSq, s); - } - }; - - if (type_of(pc) == KING) - { - if constexpr (ComputeRay) - process_sliders(false); - return; - } - - - const Bitboard knights = pieces(KNIGHT); - const Bitboard whitePawns = pieces(WHITE, PAWN); - const Bitboard blackPawns = pieces(BLACK, PAWN); - - - Bitboard threatened = attacks_bb(pc, s, occupied) & occupiedNoK; - Bitboard incoming_threats = - (PseudoAttacks[KNIGHT][s] & knights) | (attacks_bb(s, WHITE) & blackPawns) - | (attacks_bb(s, BLACK) & whitePawns) | (PseudoAttacks[KING][s] & kings); - -#ifdef USE_AVX512ICL - if constexpr (PutPiece) - { - dts->threatenedSqs |= threatened; - // A bit may only be set if that square actually produces a threat, so we - // must guard setting the square accordingly - dts->threateningSqs |= Bitboard(bool(threatened)) << s; - } - - DirtyThreat dt_template{pc, NO_PIECE, s, Square(0), PutPiece}; - write_multiple_dirties( - *this, threatened, dt_template, dts); - - Bitboard all_attackers = sliders | incoming_threats; - - if constexpr (PutPiece) - { - dts->threatenedSqs |= Bitboard(bool(all_attackers)) << s; // same as above - dts->threateningSqs |= all_attackers; - } - - dt_template = {NO_PIECE, pc, Square(0), s, PutPiece}; - write_multiple_dirties(*this, all_attackers, - dt_template, dts); -#else - while (threatened) - { - Square threatenedSq = pop_lsb(threatened); - Piece threatenedPc = piece_on(threatenedSq); - - assert(threatenedSq != s); - assert(threatenedPc); - - add_dirty_threat(dts, pc, threatenedPc, s, threatenedSq); - } -#endif - - if constexpr (ComputeRay) - { -#ifndef USE_AVX512ICL - process_sliders(true); -#else // for ICL, direct threats were processed earlier (all_attackers) - process_sliders(false); -#endif - } - else - { - incoming_threats |= sliders; - } - -#ifndef USE_AVX512ICL - while (incoming_threats) - { - Square srcSq = pop_lsb(incoming_threats); - Piece srcPc = piece_on(srcSq); - - assert(srcSq != s); - assert(srcPc != NO_PIECE); - - add_dirty_threat(dts, srcPc, pc, srcSq, s); - } -#endif -} - -// Helper used to do/undo a castling move. This is a bit -// tricky in Chess960 where from/to squares can overlap. -template -void Position::do_castling(Color us, - Square from, - Square& to, - Square& rfrom, - Square& rto, - DirtyThreats* const dts, - DirtyPiece* const dp) { - - bool kingSide = to > from; - rfrom = to; // Castling is encoded as "king captures friendly rook" - rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); - to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); - - assert(!Do || dp); - - if (Do) - { - dp->to = to; - dp->remove_pc = dp->add_pc = make_piece(us, ROOK); - dp->remove_sq = rfrom; - dp->add_sq = rto; - } - - // Remove both pieces first since squares could overlap in Chess960 - remove_piece(Do ? from : to, dts); - remove_piece(Do ? rfrom : rto, dts); - put_piece(make_piece(us, KING), Do ? to : from, dts); - put_piece(make_piece(us, ROOK), Do ? rto : rfrom, dts); -} - - -// Used to do a "null move": it flips -// the side to move without executing any move on the board. -void Position::do_null_move(StateInfo& newSt) { - - assert(!checkers()); - assert(&newSt != st); - - std::memcpy(&newSt, st, sizeof(StateInfo)); - - newSt.previous = st; - st = &newSt; - - if (st->epSquare != SQ_NONE) - { - st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; - st->epSquare = SQ_NONE; - } - - st->key ^= Zobrist::side; - - st->pliesFromNull = 0; - - sideToMove = ~sideToMove; - - set_check_info(); - - st->repetition = 0; - - assert(pos_is_ok()); -} - - -// Must be used to undo a "null move" -void Position::undo_null_move() { - - assert(!checkers()); - - st = st->previous; - sideToMove = ~sideToMove; -} - - -// Tests if the SEE (Static Exchange Evaluation) -// value of move is greater or equal to the given threshold. We'll use an -// algorithm similar to alpha-beta pruning with a null window. -bool Position::see_ge(Move m, int threshold) const { - - assert(m.is_ok()); - - // Only deal with normal moves, assume others pass a simple SEE - if (m.type_of() != NORMAL) - return VALUE_ZERO >= threshold; - - Square from = m.from_sq(), to = m.to_sq(); - - assert(piece_on(from) != NO_PIECE); - - int swap = PieceValue[piece_on(to)] - threshold; - if (swap < 0) - return false; - - swap = PieceValue[piece_on(from)] - swap; - if (swap <= 0) - return true; - - assert(color_of(piece_on(from)) == sideToMove); - Bitboard occupied = pieces() ^ from ^ to; // xoring to is important for pinned piece logic - Color stm = sideToMove; - Bitboard attackers = attackers_to(to, occupied); - Bitboard stmAttackers, bb; - int res = 1; - - while (true) - { - stm = ~stm; - attackers &= occupied; - - // If stm has no more attackers then give up: stm loses - if (!(stmAttackers = attackers & pieces(stm))) - break; - - // Don't allow pinned pieces to attack as long as there are - // pinners on their original square. - if (pinners(~stm) & occupied) - { - stmAttackers &= ~blockers_for_king(stm); - - if (!stmAttackers) - break; - } - - res ^= 1; - - // Locate and remove the next least valuable attacker, and add to - // the bitboard 'attackers' any X-ray attackers behind it. - if ((bb = stmAttackers & pieces(PAWN))) - { - if ((swap = PawnValue - swap) < res) - break; - occupied ^= least_significant_square_bb(bb); - - attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); - } - - else if ((bb = stmAttackers & pieces(KNIGHT))) - { - if ((swap = KnightValue - swap) < res) - break; - occupied ^= least_significant_square_bb(bb); - } - - else if ((bb = stmAttackers & pieces(BISHOP))) - { - if ((swap = BishopValue - swap) < res) - break; - occupied ^= least_significant_square_bb(bb); - - attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); - } - - else if ((bb = stmAttackers & pieces(ROOK))) - { - if ((swap = RookValue - swap) < res) - break; - occupied ^= least_significant_square_bb(bb); - - attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); - } - - else if ((bb = stmAttackers & pieces(QUEEN))) - { - swap = QueenValue - swap; - // implies that the previous recapture was done by a higher rated piece than a Queen (King is excluded) - assert(swap >= res); - occupied ^= least_significant_square_bb(bb); - - attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) - | (attacks_bb(to, occupied) & pieces(ROOK, QUEEN)); - } - - else // KING - // If we "capture" with the king but the opponent still has attackers, - // reverse the result. - return (attackers & ~pieces(stm)) ? res ^ 1 : res; - } - - return bool(res); -} - -// Tests whether the position is drawn by 50-move rule -// or by repetition. It does not detect stalemates. -bool Position::is_draw(int ply) const { - - if (st->rule50 > 99 && (!checkers() || MoveList(*this).size())) - return true; - - return is_repetition(ply); -} - -// Return a draw score if a position repeats once earlier but strictly -// after the root, or repeats twice before or at the root. -bool Position::is_repetition(int ply) const { return st->repetition && st->repetition < ply; } - -// Tests whether there has been at least one repetition -// of positions since the last capture or pawn move. -bool Position::has_repeated() const { - - StateInfo* stc = st; - int end = std::min(st->rule50, st->pliesFromNull); - while (end-- >= 4) - { - if (stc->repetition) - return true; - - stc = stc->previous; - } - return false; -} - - -// Tests if the position has a move which draws by repetition. -// This function accurately matches the outcome of is_draw() over all legal moves. -bool Position::upcoming_repetition(int ply) const { - - int j; - - int end = std::min(st->rule50, st->pliesFromNull); - - if (end < 3) - return false; - - Key originalKey = st->key; - StateInfo* stp = st->previous; - Key other = originalKey ^ stp->key ^ Zobrist::side; - - for (int i = 3; i <= end; i += 2) - { - stp = stp->previous; - other ^= stp->key ^ stp->previous->key ^ Zobrist::side; - stp = stp->previous; - - if (other != 0) - continue; - - Key moveKey = originalKey ^ stp->key; - if ((j = H1(moveKey), cuckoo[j] == moveKey) || (j = H2(moveKey), cuckoo[j] == moveKey)) - { - Move move = cuckooMove[j]; - Square s1 = move.from_sq(); - Square s2 = move.to_sq(); - - if (!((between_bb(s1, s2) ^ s2) & pieces())) - { - if (ply > i) - return true; - - // For nodes before or at the root, check that the move is a - // repetition rather than a move to the current position. - if (stp->repetition) - return true; - } - } - } - return false; -} - - -// Flips position with the white and black sides reversed. This -// is only useful for debugging e.g. for finding evaluation symmetry bugs. -void Position::flip() { - - string f, token; - std::stringstream ss(fen()); - - for (Rank r = RANK_8;; --r) // Piece placement - { - std::getline(ss, token, r > RANK_1 ? '/' : ' '); - f.insert(0, token + (f.empty() ? " " : "/")); - - if (r == RANK_1) - break; - } - - ss >> token; // Active color - f += (token == "w" ? "B " : "W "); // Will be lowercased later - - ss >> token; // Castling availability - f += token + " "; - - std::transform(f.begin(), f.end(), f.begin(), - [](char c) { return char(islower(c) ? toupper(c) : tolower(c)); }); - - ss >> token; // En passant square - f += (token == "-" ? token : token.replace(1, 1, token[1] == '3' ? "6" : "3")); - - std::getline(ss, token); // Half and full moves - f += token; - - set(f, is_chess960(), st); - - assert(pos_is_ok()); -} - - -bool Position::material_key_is_ok() const { return compute_material_key() == st->materialKey; } - - -// Performs some consistency checks for the position object -// and raise an assert if something wrong is detected. -// This is meant to be helpful when debugging. -bool Position::pos_is_ok() const { - - constexpr bool Fast = true; // Quick (default) or full check? - - if ((sideToMove != WHITE && sideToMove != BLACK) || piece_on(square(WHITE)) != W_KING - || piece_on(square(BLACK)) != B_KING - || (ep_square() != SQ_NONE && relative_rank(sideToMove, ep_square()) != RANK_6)) - assert(0 && "pos_is_ok: Default"); - - if (Fast) - return true; - - if (pieceCount[W_KING] != 1 || pieceCount[B_KING] != 1 - || attackers_to_exist(square(~sideToMove), pieces(), sideToMove)) - assert(0 && "pos_is_ok: Kings"); - - if ((pieces(PAWN) & (Rank1BB | Rank8BB)) || pieceCount[W_PAWN] > 8 || pieceCount[B_PAWN] > 8) - assert(0 && "pos_is_ok: Pawns"); - - - if (ep_square() != SQ_NONE) - { - Square ksq = square(sideToMove); - - Bitboard captured = (ep_square() + pawn_push(~sideToMove)) & pieces(~sideToMove, PAWN); - Bitboard pawns = attacks_bb(ep_square(), ~sideToMove) & pieces(sideToMove, PAWN); - Bitboard potentialCheckers = pieces(~sideToMove) ^ captured; - - if (!captured || !pawns - || ((attackers_to(ksq, pieces() ^ captured ^ ep_square() ^ lsb(pawns)) - & potentialCheckers) - && (attackers_to(ksq, pieces() ^ captured ^ ep_square() ^ msb(pawns)) - & potentialCheckers))) - assert(0 && "pos_is_ok: En passant square"); - } - - if ((pieces(WHITE) & pieces(BLACK)) || (pieces(WHITE) | pieces(BLACK)) != pieces() - || popcount(pieces(WHITE)) > 16 || popcount(pieces(BLACK)) > 16) - assert(0 && "pos_is_ok: Bitboards"); - - for (PieceType p1 = PAWN; p1 <= KING; ++p1) - for (PieceType p2 = PAWN; p2 <= KING; ++p2) - if (p1 != p2 && (pieces(p1) & pieces(p2))) - assert(0 && "pos_is_ok: Bitboards"); - - - for (Piece pc : Pieces) - if (pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc))) - || pieceCount[pc] != std::count(board.begin(), board.end(), pc)) - assert(0 && "pos_is_ok: Pieces"); - - for (Color c : {WHITE, BLACK}) - for (CastlingRights cr : {c & KING_SIDE, c & QUEEN_SIDE}) - { - if (!can_castle(cr)) - continue; - - if (piece_on(castlingRookSquare[cr]) != make_piece(c, ROOK) - || castlingRightsMask[castlingRookSquare[cr]] != cr - || (castlingRightsMask[square(c)] & cr) != cr) - assert(0 && "pos_is_ok: Castling"); - } - - assert(material_key_is_ok() && "pos_is_ok: materialKey"); - - return true; -} - -} // namespace Stockfish diff --git a/src/position.h b/src/position.h deleted file mode 100644 index e02a400d32f6554bcd21417457f8d8ae9fe0aa5d..0000000000000000000000000000000000000000 --- a/src/position.h +++ /dev/null @@ -1,414 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef POSITION_H_INCLUDED -#define POSITION_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include - -#include "bitboard.h" -#include "types.h" - -namespace Stockfish { - -class TranspositionTable; -struct SharedHistories; - -// StateInfo struct stores information needed to restore a Position object to -// its previous state when we retract a move. Whenever a move is made on the -// board (by calling Position::do_move), a StateInfo object must be passed. - -struct StateInfo { - - // Copied when making a move - Key materialKey; - Key pawnKey; - Key minorPieceKey; - Key nonPawnKey[COLOR_NB]; - Value nonPawnMaterial[COLOR_NB]; - int castlingRights; - int rule50; - int pliesFromNull; - Square epSquare; - - // Not copied when making a move (will be recomputed anyhow) - Key key; - Bitboard checkersBB; - StateInfo* previous; - Bitboard blockersForKing[COLOR_NB]; - Bitboard pinners[COLOR_NB]; - Bitboard checkSquares[PIECE_TYPE_NB]; - Piece capturedPiece; - int repetition; -}; - - -// A list to keep track of the position states along the setup moves (from the -// start position to the position just before the search starts). Needed by -// 'draw by repetition' detection. Use a std::deque because pointers to -// elements are not invalidated upon list resizing. -using StateListPtr = std::unique_ptr>; - -// Position class stores information regarding the board representation as -// pieces, side to move, hash keys, castling info, etc. Important methods are -// do_move() and undo_move(), used by the search to update node info when -// traversing the search tree. -class Position { - public: - static void init(); - - Position() = default; - Position(const Position&) = delete; - Position& operator=(const Position&) = delete; - - // FEN string input/output - Position& set(const std::string& fenStr, bool isChess960, StateInfo* si); - Position& set(const std::string& code, Color c, StateInfo* si); - std::string fen() const; - - // Position representation - Bitboard pieces() const; // All pieces - template - Bitboard pieces(PieceTypes... pts) const; - Bitboard pieces(Color c) const; - template - Bitboard pieces(Color c, PieceTypes... pts) const; - Piece piece_on(Square s) const; - const std::array& piece_array() const; - Square ep_square() const; - bool empty(Square s) const; - template - int count(Color c) const; - template - int count() const; - template - Square square(Color c) const; - - // Castling - bool can_castle(CastlingRights cr) const; - bool castling_impeded(CastlingRights cr) const; - Square castling_rook_square(CastlingRights cr) const; - - // Checking - Bitboard checkers() const; - Bitboard blockers_for_king(Color c) const; - Bitboard check_squares(PieceType pt) const; - Bitboard pinners(Color c) const; - - // Attacks to/from a given square - Bitboard attackers_to(Square s) const; - Bitboard attackers_to(Square s, Bitboard occupied) const; - bool attackers_to_exist(Square s, Bitboard occupied, Color c) const; - void update_slider_blockers(Color c) const; - template - Bitboard attacks_by(Color c) const; - - // Properties of moves - bool legal(Move m) const; - bool pseudo_legal(const Move m) const; - bool capture(Move m) const; - bool capture_stage(Move m) const; - bool gives_check(Move m) const; - Piece moved_piece(Move m) const; - Piece captured_piece() const; - - // Doing and undoing moves - void do_move(Move m, StateInfo& newSt, const TranspositionTable* tt); - void do_move(Move m, - StateInfo& newSt, - bool givesCheck, - DirtyPiece& dp, - DirtyThreats& dts, - const TranspositionTable* tt, - const SharedHistories* worker); - void undo_move(Move m); - void do_null_move(StateInfo& newSt); - void undo_null_move(); - - // Static Exchange Evaluation - bool see_ge(Move m, int threshold = 0) const; - - // Accessing hash keys - Key key() const; - Key material_key() const; - Key pawn_key() const; - Key minor_piece_key() const; - Key non_pawn_key(Color c) const; - - // Other properties of the position - Color side_to_move() const; - int game_ply() const; - bool is_chess960() const; - bool is_draw(int ply) const; - bool is_repetition(int ply) const; - bool upcoming_repetition(int ply) const; - bool has_repeated() const; - int rule50_count() const; - Value non_pawn_material(Color c) const; - Value non_pawn_material() const; - - // Position consistency check, for debugging - bool pos_is_ok() const; - bool material_key_is_ok() const; - void flip(); - - StateInfo* state() const; - - void put_piece(Piece pc, Square s, DirtyThreats* const dts = nullptr); - void remove_piece(Square s, DirtyThreats* const dts = nullptr); - void swap_piece(Square s, Piece pc, DirtyThreats* const dts = nullptr); - - private: - // Initialization helpers (used while setting up a position) - void set_castling_right(Color c, Square rfrom); - Key compute_material_key() const; - void set_state() const; - void set_check_info() const; - - // Other helpers - template - void update_piece_threats(Piece pc, - Square s, - DirtyThreats* const dts, - Bitboard noRaysContaining = -1ULL) const; - void move_piece(Square from, Square to, DirtyThreats* const dts = nullptr); - template - void do_castling(Color us, - Square from, - Square& to, - Square& rfrom, - Square& rto, - DirtyThreats* const dts = nullptr, - DirtyPiece* const dp = nullptr); - Key adjust_key50(Key k) const; - - // Data members - std::array board; - std::array byTypeBB; - std::array byColorBB; - - int pieceCount[PIECE_NB]; - int castlingRightsMask[SQUARE_NB]; - Square castlingRookSquare[CASTLING_RIGHT_NB]; - Bitboard castlingPath[CASTLING_RIGHT_NB]; - StateInfo* st; - int gamePly; - Color sideToMove; - bool chess960; - DirtyPiece scratch_dp; - DirtyThreats scratch_dts; -}; - -std::ostream& operator<<(std::ostream& os, const Position& pos); - -inline Color Position::side_to_move() const { return sideToMove; } - -inline Piece Position::piece_on(Square s) const { - assert(is_ok(s)); - return board[s]; -} - -inline const std::array& Position::piece_array() const { return board; } - -inline bool Position::empty(Square s) const { return piece_on(s) == NO_PIECE; } - -inline Piece Position::moved_piece(Move m) const { return piece_on(m.from_sq()); } - -inline Bitboard Position::pieces() const { return byTypeBB[ALL_PIECES]; } - -template -inline Bitboard Position::pieces(PieceTypes... pts) const { - return (byTypeBB[pts] | ...); -} - -inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; } - -template -inline Bitboard Position::pieces(Color c, PieceTypes... pts) const { - return pieces(c) & pieces(pts...); -} - -template -inline int Position::count(Color c) const { - return pieceCount[make_piece(c, Pt)]; -} - -template -inline int Position::count() const { - return count(WHITE) + count(BLACK); -} - -template -inline Square Position::square(Color c) const { - assert(count(c) == 1); - return lsb(pieces(c, Pt)); -} - -inline Square Position::ep_square() const { return st->epSquare; } - -inline bool Position::can_castle(CastlingRights cr) const { return st->castlingRights & cr; } - -inline bool Position::castling_impeded(CastlingRights cr) const { - assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); - return pieces() & castlingPath[cr]; -} - -inline Square Position::castling_rook_square(CastlingRights cr) const { - assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); - return castlingRookSquare[cr]; -} - -inline Bitboard Position::attackers_to(Square s) const { return attackers_to(s, pieces()); } - -template -inline Bitboard Position::attacks_by(Color c) const { - - if constexpr (Pt == PAWN) - return c == WHITE ? pawn_attacks_bb(pieces(WHITE, PAWN)) - : pawn_attacks_bb(pieces(BLACK, PAWN)); - else - { - Bitboard threats = 0; - Bitboard attackers = pieces(c, Pt); - while (attackers) - threats |= attacks_bb(pop_lsb(attackers), pieces()); - return threats; - } -} - -inline Bitboard Position::checkers() const { return st->checkersBB; } - -inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; } - -inline Bitboard Position::pinners(Color c) const { return st->pinners[c]; } - -inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; } - -inline Key Position::key() const { return adjust_key50(st->key); } - -inline Key Position::adjust_key50(Key k) const { - return st->rule50 < 14 ? k : k ^ make_key((st->rule50 - 14) / 8); -} - -inline Key Position::pawn_key() const { return st->pawnKey; } - -inline Key Position::material_key() const { return st->materialKey; } - -inline Key Position::minor_piece_key() const { return st->minorPieceKey; } - -inline Key Position::non_pawn_key(Color c) const { return st->nonPawnKey[c]; } - -inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; } - -inline Value Position::non_pawn_material() const { - return non_pawn_material(WHITE) + non_pawn_material(BLACK); -} - -inline int Position::game_ply() const { return gamePly; } - -inline int Position::rule50_count() const { return st->rule50; } - -inline bool Position::is_chess960() const { return chess960; } - -inline bool Position::capture(Move m) const { - assert(m.is_ok()); - return (!empty(m.to_sq()) && m.type_of() != CASTLING) || m.type_of() == EN_PASSANT; -} - -// Returns true if a move is generated from the capture stage, having also -// queen promotions covered, i.e. consistency with the capture stage move -// generation is needed to avoid the generation of duplicate moves. -inline bool Position::capture_stage(Move m) const { - assert(m.is_ok()); - return capture(m) || m.promotion_type() == QUEEN; -} - -inline Piece Position::captured_piece() const { return st->capturedPiece; } - -inline void Position::put_piece(Piece pc, Square s, DirtyThreats* const dts) { - board[s] = pc; - byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s; - byColorBB[color_of(pc)] |= s; - pieceCount[pc]++; - pieceCount[make_piece(color_of(pc), ALL_PIECES)]++; - - if (dts) - update_piece_threats(pc, s, dts); -} - -inline void Position::remove_piece(Square s, DirtyThreats* const dts) { - Piece pc = board[s]; - - if (dts) - update_piece_threats(pc, s, dts); - - byTypeBB[ALL_PIECES] ^= s; - byTypeBB[type_of(pc)] ^= s; - byColorBB[color_of(pc)] ^= s; - board[s] = NO_PIECE; - pieceCount[pc]--; - pieceCount[make_piece(color_of(pc), ALL_PIECES)]--; -} - -inline void Position::move_piece(Square from, Square to, DirtyThreats* const dts) { - Piece pc = board[from]; - Bitboard fromTo = from | to; - - if (dts) - update_piece_threats(pc, from, dts, fromTo); - - byTypeBB[ALL_PIECES] ^= fromTo; - byTypeBB[type_of(pc)] ^= fromTo; - byColorBB[color_of(pc)] ^= fromTo; - board[from] = NO_PIECE; - board[to] = pc; - - if (dts) - update_piece_threats(pc, to, dts, fromTo); -} - -inline void Position::swap_piece(Square s, Piece pc, DirtyThreats* const dts) { - Piece old = board[s]; - - remove_piece(s); - - if (dts) - update_piece_threats(old, s, dts); - - put_piece(pc, s); - - if (dts) - update_piece_threats(pc, s, dts); -} - -inline void Position::do_move(Move m, StateInfo& newSt, const TranspositionTable* tt = nullptr) { - new (&scratch_dts) DirtyThreats; - do_move(m, newSt, gives_check(m), scratch_dp, scratch_dts, tt, nullptr); -} - -inline StateInfo* Position::state() const { return st; } - -} // namespace Stockfish - -#endif // #ifndef POSITION_H_INCLUDED diff --git a/src/score.cpp b/src/score.cpp deleted file mode 100644 index ea62577b9f48fae1e6900ec21239e44470ff32df..0000000000000000000000000000000000000000 --- a/src/score.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "score.h" - -#include -#include -#include - -#include "uci.h" - -namespace Stockfish { - -Score::Score(Value v, const Position& pos) { - assert(-VALUE_INFINITE < v && v < VALUE_INFINITE); - - if (!is_decisive(v)) - { - score = InternalUnits{UCIEngine::to_cp(v, pos)}; - } - else if (std::abs(v) <= VALUE_TB) - { - auto distance = VALUE_TB - std::abs(v); - score = (v > 0) ? Tablebase{distance, true} : Tablebase{-distance, false}; - } - else - { - auto distance = VALUE_MATE - std::abs(v); - score = (v > 0) ? Mate{distance} : Mate{-distance}; - } -} - -} \ No newline at end of file diff --git a/src/score.h b/src/score.h deleted file mode 100644 index cf89d3cdd54ca9bd4c0b54f96bce2eb204e92506..0000000000000000000000000000000000000000 --- a/src/score.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef SCORE_H_INCLUDED -#define SCORE_H_INCLUDED - -#include -#include - -#include "types.h" - -namespace Stockfish { - -class Position; - -class Score { - public: - struct Mate { - int plies; - }; - - struct Tablebase { - int plies; - bool win; - }; - - struct InternalUnits { - int value; - }; - - Score() = default; - Score(Value v, const Position& pos); - - template - bool is() const { - return std::holds_alternative(score); - } - - template - T get() const { - return std::get(score); - } - - template - decltype(auto) visit(F&& f) const { - return std::visit(std::forward(f), score); - } - - private: - std::variant score; -}; - -} - -#endif // #ifndef SCORE_H_INCLUDED diff --git a/src/search.cpp b/src/search.cpp deleted file mode 100644 index 028f61cce2b07be0ae6f6bcf34217ed81f83f873..0000000000000000000000000000000000000000 --- a/src/search.cpp +++ /dev/null @@ -1,2217 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "search.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bitboard.h" -#include "evaluate.h" -#include "history.h" -#include "misc.h" -#include "movegen.h" -#include "movepick.h" -#include "nnue/network.h" -#include "nnue/nnue_accumulator.h" -#include "position.h" -#include "syzygy/tbprobe.h" -#include "thread.h" -#include "timeman.h" -#include "tt.h" -#include "types.h" -#include "uci.h" -#include "ucioption.h" - -namespace Stockfish { - -namespace TB = Tablebases; - -void syzygy_extend_pv(const OptionsMap& options, - const Search::LimitsType& limits, - Stockfish::Position& pos, - Stockfish::Search::RootMove& rootMove, - Value& v); - -using namespace Search; - -namespace { - -constexpr int SEARCHEDLIST_CAPACITY = 32; -using SearchedList = ValueList; - -// (*Scalers): -// The values with Scaler asterisks have proven non-linear scaling. -// They are optimized to time controls of 180 + 1.8 and longer, -// so changing them or adding conditions that are similar requires -// tests at these types of time controls. - -// (*Scaler) All tuned parameters at time controls shorter than -// optimized for require verifications at longer time controls - -int correction_value(const Worker& w, const Position& pos, const Stack* const ss) { - const Color us = pos.side_to_move(); - const auto m = (ss - 1)->currentMove; - const auto& shared = w.sharedHistory; - const int pcv = shared.pawn_correction_entry(pos).at(us).pawn; - const int micv = shared.minor_piece_correction_entry(pos).at(us).minor; - const int wnpcv = shared.nonpawn_correction_entry(pos).at(us).nonPawnWhite; - const int bnpcv = shared.nonpawn_correction_entry(pos).at(us).nonPawnBlack; - const int cntcv = - m.is_ok() ? (*(ss - 2)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()] - + (*(ss - 4)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()] - : 8; - - return 11433 * pcv + 8823 * micv + 12749 * (wnpcv + bnpcv) + 8022 * cntcv; -} - -// Add correctionHistory value to raw staticEval and guarantee evaluation -// does not hit the tablebase range. -Value to_corrected_static_eval(const Value v, const int cv) { - return std::clamp(v + cv / 131072, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); -} - -void update_correction_history(const Position& pos, - Stack* const ss, - Search::Worker& workerThread, - const int bonus) { - const Move m = (ss - 1)->currentMove; - const Color us = pos.side_to_move(); - - constexpr int nonPawnWeight = 181; - auto& shared = workerThread.sharedHistory; - - shared.pawn_correction_entry(pos).at(us).pawn << bonus; - shared.minor_piece_correction_entry(pos).at(us).minor << bonus * 155 / 128; - shared.nonpawn_correction_entry(pos).at(us).nonPawnWhite << bonus * nonPawnWeight / 128; - shared.nonpawn_correction_entry(pos).at(us).nonPawnBlack << bonus * nonPawnWeight / 128; - - // Branchless: use mask to zero bonus when move is not ok - const int mask = int(m.is_ok()); - const Square to = m.to_sq_unchecked(); - const Piece pc = pos.piece_on(to); - const int bonus2 = (bonus * 129 / 128) * mask; - const int bonus4 = (bonus * 61 / 128) * mask; - (*(ss - 2)->continuationCorrectionHistory)[pc][to] << bonus2; - (*(ss - 4)->continuationCorrectionHistory)[pc][to] << bonus4; -} - -// Add a small random component to draw evaluations to avoid 3-fold blindness -Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } -Value value_to_tt(Value v, int ply); -Value value_from_tt(Value v, int ply, int r50c); -void update_pv(Move* pv, Move move, const Move* childPv); -void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); -void update_quiet_histories( - const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); -void update_all_stats(const Position& pos, - Stack* ss, - Search::Worker& workerThread, - Move bestMove, - Square prevSq, - SearchedList& quietsSearched, - SearchedList& capturesSearched, - Depth depth, - Move ttMove); - -bool is_shuffling(Move move, Stack* const ss, const Position& pos) { - if (pos.capture_stage(move) || pos.rule50_count() < 11) - return false; - if (pos.state()->pliesFromNull <= 6 || ss->ply < 19) - return false; - return move.from_sq() == (ss - 2)->currentMove.to_sq() - && (ss - 2)->currentMove.from_sq() == (ss - 4)->currentMove.to_sq(); -} - -} // namespace - -Search::Worker::Worker(SharedState& sharedState, - std::unique_ptr sm, - size_t threadId, - size_t numaThreadId, - size_t numaTotalThreads, - NumaReplicatedAccessToken token) : - // Unpack the SharedState struct into member variables - sharedHistory(sharedState.sharedHistories.at(token.get_numa_index())), - threadIdx(threadId), - numaThreadIdx(numaThreadId), - numaTotal(numaTotalThreads), - numaAccessToken(token), - manager(std::move(sm)), - options(sharedState.options), - threads(sharedState.threads), - tt(sharedState.tt), - networks(sharedState.networks), - refreshTable(networks[token]) { - clear(); -} - -void Search::Worker::ensure_network_replicated() { - // Access once to force lazy initialization. - // We do this because we want to avoid initialization during search. - (void) (networks[numaAccessToken]); -} - -void Search::Worker::start_searching() { - - accumulatorStack.reset(); - - // Non-main threads go directly to iterative_deepening() - if (!is_mainthread()) - { - iterative_deepening(); - return; - } - - main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options, - main_manager()->originalTimeAdjust); - tt.new_search(); - - if (rootMoves.empty()) - { - rootMoves.emplace_back(Move::none()); - main_manager()->updates.onUpdateNoMoves( - {0, {rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW, rootPos}}); - } - else - { - threads.start_searching(); // start non-main threads - iterative_deepening(); // main thread start searching - } - - // When we reach the maximum depth, we can arrive here without a raise of - // threads.stop. However, if we are pondering or in an infinite search, - // the UCI protocol states that we shouldn't print the best move before the - // GUI sends a "stop" or "ponderhit" command. We therefore simply wait here - // until the GUI sends one of those commands. - while (!threads.stop && (main_manager()->ponder || limits.infinite)) - {} // Busy wait for a stop or a ponder reset - - // Stop the threads if not already stopped (also raise the stop if - // "ponderhit" just reset threads.ponder) - threads.stop = true; - - // Wait until all threads have finished - threads.wait_for_search_finished(); - - // When playing in 'nodes as time' mode, subtract the searched nodes from - // the available ones before exiting. - if (limits.npmsec) - main_manager()->tm.advance_nodes_time(threads.nodes_searched() - - limits.inc[rootPos.side_to_move()]); - - Worker* bestThread = this; - Skill skill = - Skill(options["Skill Level"], options["UCI_LimitStrength"] ? int(options["UCI_Elo"]) : 0); - - if (int(options["MultiPV"]) == 1 && !limits.depth && !limits.mate && !skill.enabled() - && rootMoves[0].pv[0] != Move::none()) - bestThread = threads.get_best_thread()->worker.get(); - - main_manager()->bestPreviousScore = bestThread->rootMoves[0].score; - main_manager()->bestPreviousAverageScore = bestThread->rootMoves[0].averageScore; - - // Send again PV info if we have a new best thread - if (bestThread != this) - main_manager()->pv(*bestThread, threads, tt, bestThread->completedDepth); - - std::string ponder; - - if (bestThread->rootMoves[0].pv.size() > 1 - || bestThread->rootMoves[0].extract_ponder_from_tt(tt, rootPos)) - ponder = UCIEngine::move(bestThread->rootMoves[0].pv[1], rootPos.is_chess960()); - - auto bestmove = UCIEngine::move(bestThread->rootMoves[0].pv[0], rootPos.is_chess960()); - main_manager()->updates.onBestmove(bestmove, ponder); -} - -// Main iterative deepening loop. It calls search() -// repeatedly with increasing depth until the allocated thinking time has been -// consumed, the user stops the search, or the maximum search depth is reached. -void Search::Worker::iterative_deepening() { - - SearchManager* mainThread = (is_mainthread() ? main_manager() : nullptr); - - Move pv[MAX_PLY + 1]; - - Depth lastBestMoveDepth = 0; - Value lastBestScore = -VALUE_INFINITE; - auto lastBestPV = std::vector{Move::none()}; - - Value alpha, beta; - Value bestValue = -VALUE_INFINITE; - Color us = rootPos.side_to_move(); - double timeReduction = 1, totBestMoveChanges = 0; - int delta, iterIdx = 0; - - // Allocate stack with extra size to allow access from (ss - 7) to (ss + 2): - // (ss - 7) is needed for update_continuation_histories(ss - 1) which accesses (ss - 6), - // (ss + 2) is needed for initialization of cutOffCnt. - Stack stack[MAX_PLY + 10] = {}; - Stack* ss = stack + 7; - - for (int i = 7; i > 0; --i) - { - (ss - i)->continuationHistory = - &continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel - (ss - i)->continuationCorrectionHistory = &continuationCorrectionHistory[NO_PIECE][0]; - (ss - i)->staticEval = VALUE_NONE; - } - - for (int i = 0; i <= MAX_PLY + 2; ++i) - (ss + i)->ply = i; - - ss->pv = pv; - - if (mainThread) - { - if (mainThread->bestPreviousScore == VALUE_INFINITE) - mainThread->iterValue.fill(VALUE_ZERO); - else - mainThread->iterValue.fill(mainThread->bestPreviousScore); - } - - size_t multiPV = size_t(options["MultiPV"]); - Skill skill(options["Skill Level"], options["UCI_LimitStrength"] ? int(options["UCI_Elo"]) : 0); - - // When playing with strength handicap enable MultiPV search that we will - // use behind-the-scenes to retrieve a set of possible moves. - if (skill.enabled()) - multiPV = std::max(multiPV, size_t(4)); - - multiPV = std::min(multiPV, rootMoves.size()); - - int searchAgainCounter = 0; - - lowPlyHistory.fill(100); - - for (Color c : {WHITE, BLACK}) - for (int i = 0; i < UINT_16_HISTORY_SIZE; i++) - mainHistory[c][i] = mainHistory[c][i] * 778 / 1024; - - // Iterative deepening loop until requested to stop or the target depth is reached - while (++rootDepth < MAX_PLY && !threads.stop - && !(limits.depth && mainThread && rootDepth > limits.depth)) - { - // Age out PV variability metric - if (mainThread) - totBestMoveChanges /= 2; - - // Save the last iteration's scores before the first PV line is searched and - // all the move scores except the (new) PV are set to -VALUE_INFINITE. - for (RootMove& rm : rootMoves) - rm.previousScore = rm.score; - - size_t pvFirst = 0; - pvLast = 0; - - if (!threads.increaseDepth) - searchAgainCounter++; - - // MultiPV loop. We perform a full root search for each PV line - for (pvIdx = 0; pvIdx < multiPV; ++pvIdx) - { - if (pvIdx == pvLast) - { - pvFirst = pvLast; - for (pvLast++; pvLast < rootMoves.size(); pvLast++) - if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank) - break; - } - - // Reset UCI info selDepth for each depth and each PV line - selDepth = 0; - - // Reset aspiration window starting size - delta = 5 + threadIdx % 8 + std::abs(rootMoves[pvIdx].meanSquaredScore) / 9968; - Value avg = rootMoves[pvIdx].averageScore; - alpha = std::max(avg - delta, -VALUE_INFINITE); - beta = std::min(avg + delta, VALUE_INFINITE); - - // Adjust optimism based on root move's averageScore - optimism[us] = 142 * avg / (std::abs(avg) + 86); - optimism[~us] = -optimism[us]; - - // Start with a small aspiration window and, in the case of a fail - // high/low, re-search with a bigger window until we don't fail - // high/low anymore. - int failedHighCnt = 0; - while (true) - { - // Adjust the effective depth searched, but ensure at least one - // effective increment for every four searchAgain steps (see issue #2717). - Depth adjustedDepth = - std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4); - rootDelta = beta - alpha; - bestValue = search(rootPos, ss, alpha, beta, adjustedDepth, false); - - // Bring the best move to the front. It is critical that sorting - // is done with a stable algorithm because all the values but the - // first and eventually the new best one is set to -VALUE_INFINITE - // and we want to keep the same order for all the moves except the - // new PV that goes to the front. Note that in the case of MultiPV - // search the already searched PV lines are preserved. - std::stable_sort(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast); - - // If search has been stopped, we break immediately. Sorting is - // safe because RootMoves is still valid, although it refers to - // the previous iteration. - if (threads.stop) - break; - - // When failing high/low give some update before a re-search. To avoid - // excessive output that could hang GUIs like Fritz 19, only start - // at nodes > 10M (rather than depth N, which can be reached quickly) - if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta) - && nodes > 10000000) - main_manager()->pv(*this, threads, tt, rootDepth); - - // In case of failing low/high increase aspiration window and re-search, - // otherwise exit the loop. - if (bestValue <= alpha) - { - beta = alpha; - alpha = std::max(bestValue - delta, -VALUE_INFINITE); - - failedHighCnt = 0; - if (mainThread) - mainThread->stopOnPonderhit = false; - } - else if (bestValue >= beta) - { - alpha = std::max(beta - delta, alpha); - beta = std::min(bestValue + delta, VALUE_INFINITE); - ++failedHighCnt; - } - else - break; - - delta += delta / 3; - - assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); - } - - // Sort the PV lines searched so far and update the GUI - std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); - - if (mainThread - && (threads.stop || pvIdx + 1 == multiPV || nodes > 10000000) - // A thread that aborted search can have a mated-in/TB-loss score and - // PV that cannot be trusted, i.e. it can be delayed or refuted if we - // would have had time to fully search other root-moves. Thus here we - // suppress any exact mated-in/TB loss output and, if we do, below pick - // the score/PV from the previously completed iteration with the most - // recent bestmove change. - && !(threads.stop && is_loss(rootMoves[0].uciScore) - && rootMoves[0].score == rootMoves[0].uciScore)) - main_manager()->pv(*this, threads, tt, rootDepth); - - if (threads.stop) - break; - } - - if (!threads.stop) - completedDepth = rootDepth; - - // We make sure not to pick an unproven mated-in score, - // in case this thread prematurely stopped search (aborted-search). - if (completedDepth != rootDepth && rootMoves[0].score != -VALUE_INFINITE - && is_loss(rootMoves[0].score)) - { - // Bring the last best move to the front for best thread selection. - Utility::move_to_front(rootMoves, [&lastBestPV = std::as_const(lastBestPV)]( - const auto& rm) { return rm == lastBestPV[0]; }); - rootMoves[0].pv = lastBestPV; - rootMoves[0].score = rootMoves[0].uciScore = lastBestScore; - } - else if (rootMoves[0].pv[0] != lastBestPV[0]) - { - lastBestPV = rootMoves[0].pv; - lastBestScore = rootMoves[0].score; - lastBestMoveDepth = rootDepth; - } - - if (!mainThread) - continue; - - // Have we found a "mate in x"? - if (limits.mate && rootMoves[0].score == rootMoves[0].uciScore - && ((rootMoves[0].score >= VALUE_MATE_IN_MAX_PLY - && VALUE_MATE - rootMoves[0].score <= 2 * limits.mate) - || (rootMoves[0].score != -VALUE_INFINITE - && rootMoves[0].score <= VALUE_MATED_IN_MAX_PLY - && VALUE_MATE + rootMoves[0].score <= 2 * limits.mate))) - threads.stop = true; - - // If the skill level is enabled and time is up, pick a sub-optimal best move - if (skill.enabled() && skill.time_to_pick(rootDepth)) - skill.pick_best(rootMoves, multiPV); - - // Use part of the gained time from a previous stable move for the current move - for (auto&& th : threads) - { - totBestMoveChanges += th->worker->bestMoveChanges; - th->worker->bestMoveChanges = 0; - } - - // Do we have time for the next iteration? Can we stop searching now? - if (limits.use_time_management() && !threads.stop && !mainThread->stopOnPonderhit) - { - uint64_t nodesEffort = - rootMoves[0].effort * 100000 / std::max(size_t(1), size_t(nodes)); - - double fallingEval = (11.85 + 2.24 * (mainThread->bestPreviousAverageScore - bestValue) - + 0.93 * (mainThread->iterValue[iterIdx] - bestValue)) - / 100.0; - fallingEval = std::clamp(fallingEval, 0.57, 1.70); - - // If the bestMove is stable over several iterations, reduce time accordingly - double k = 0.51; - double center = lastBestMoveDepth + 12.15; - - timeReduction = 0.66 + 0.85 / (0.98 + std::exp(-k * (completedDepth - center))); - - double reduction = (1.43 + mainThread->previousTimeReduction) / (2.28 * timeReduction); - - double bestMoveInstability = 1.02 + 2.14 * totBestMoveChanges / threads.size(); - - double highBestMoveEffort = nodesEffort >= 93340 ? 0.76 : 1.0; - - double totalTime = mainThread->tm.optimum() * fallingEval * reduction - * bestMoveInstability * highBestMoveEffort; - - // Cap used time in case of a single legal move for a better viewer experience - if (rootMoves.size() == 1) - totalTime = std::min(502.0, totalTime); - - auto elapsedTime = elapsed(); - - // Stop the search if we have exceeded the totalTime or maximum - if (elapsedTime > std::min(totalTime, double(mainThread->tm.maximum()))) - { - // If we are allowed to ponder do not stop the search now but - // keep pondering until the GUI sends "ponderhit" or "stop". - if (mainThread->ponder) - mainThread->stopOnPonderhit = true; - else - threads.stop = true; - } - else - threads.increaseDepth = mainThread->ponder || elapsedTime <= totalTime * 0.50; - } - - mainThread->iterValue[iterIdx] = bestValue; - iterIdx = (iterIdx + 1) & 3; - } - - if (!mainThread) - return; - - mainThread->previousTimeReduction = timeReduction; - - // If the skill level is enabled, swap the best PV line with the sub-optimal one - if (skill.enabled()) - std::swap(rootMoves[0], - *std::find(rootMoves.begin(), rootMoves.end(), - skill.best ? skill.best : skill.pick_best(rootMoves, multiPV))); -} - - -void Search::Worker::do_move(Position& pos, const Move move, StateInfo& st, Stack* const ss) { - do_move(pos, move, st, pos.gives_check(move), ss); -} - -void Search::Worker::do_move( - Position& pos, const Move move, StateInfo& st, const bool givesCheck, Stack* const ss) { - bool capture = pos.capture_stage(move); - // Preferable over fetch_add to avoid locking instructions - nodes.store(nodes.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); - - auto [dirtyPiece, dirtyThreats] = accumulatorStack.push(); - pos.do_move(move, st, givesCheck, dirtyPiece, dirtyThreats, &tt, &sharedHistory); - - if (ss != nullptr) - { - ss->currentMove = move; - ss->continuationHistory = - &continuationHistory[ss->inCheck][capture][dirtyPiece.pc][move.to_sq()]; - ss->continuationCorrectionHistory = - &continuationCorrectionHistory[dirtyPiece.pc][move.to_sq()]; - } -} - -void Search::Worker::do_null_move(Position& pos, StateInfo& st, Stack* const ss) { - pos.do_null_move(st); - ss->currentMove = Move::null(); - ss->continuationHistory = &continuationHistory[0][0][NO_PIECE][0]; - ss->continuationCorrectionHistory = &continuationCorrectionHistory[NO_PIECE][0]; -} - -void Search::Worker::undo_move(Position& pos, const Move move) { - pos.undo_move(move); - accumulatorStack.pop(); -} - -void Search::Worker::undo_null_move(Position& pos) { pos.undo_null_move(); } - - -// Reset histories, usually before a new game -void Search::Worker::clear() { - mainHistory.fill(0); - captureHistory.fill(-689); - - // Each thread is responsible for clearing their part of shared history - sharedHistory.correctionHistory.clear_range(0, numaThreadIdx, numaTotal); - sharedHistory.pawnHistory.clear_range(-1238, numaThreadIdx, numaTotal); - - ttMoveHistory = 0; - - for (auto& to : continuationCorrectionHistory) - for (auto& h : to) - h.fill(7); - - for (bool inCheck : {false, true}) - for (StatsType c : {NoCaptures, Captures}) - for (auto& to : continuationHistory[inCheck][c]) - for (auto& h : to) - h.fill(-541); - - for (size_t i = 1; i < reductions.size(); ++i) - reductions[i] = int(2809 / 128.0 * std::log(i)); - - refreshTable.clear(networks[numaAccessToken]); -} - - -// Main search function for both PV and non-PV nodes -template -Value Search::Worker::search( - Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { - - constexpr bool PvNode = nodeType != NonPV; - constexpr bool rootNode = nodeType == Root; - const bool allNode = !(PvNode || cutNode); - - // Dive into quiescence search when the depth reaches zero - if (depth <= 0) - return qsearch(pos, ss, alpha, beta); - - // Limit the depth if extensions made it too large - depth = std::min(depth, MAX_PLY - 1); - - // Check if we have an upcoming move that draws by repetition - if (!rootNode && alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply)) - { - alpha = value_draw(nodes); - if (alpha >= beta) - return alpha; - } - - assert(-VALUE_INFINITE <= alpha && alpha < beta && beta <= VALUE_INFINITE); - assert(PvNode || (alpha == beta - 1)); - assert(0 < depth && depth < MAX_PLY); - assert(!(PvNode && cutNode)); - - Move pv[MAX_PLY + 1]; - StateInfo st; - - Key posKey; - Move move, excludedMove, bestMove; - Depth extension, newDepth; - Value bestValue, value, eval, maxValue, probCutBeta; - bool givesCheck, improving, priorCapture, opponentWorsening; - bool capture, ttCapture; - int priorReduction; - Piece movedPiece; - - SearchedList capturesSearched; - SearchedList quietsSearched; - - // Step 1. Initialize node - ss->inCheck = pos.checkers(); - priorCapture = pos.captured_piece(); - Color us = pos.side_to_move(); - ss->moveCount = 0; - bestValue = -VALUE_INFINITE; - maxValue = VALUE_INFINITE; - - // Check for the available remaining time - if (is_mainthread()) - main_manager()->check_time(*this); - - // Used to send selDepth info to GUI (selDepth counts from 1, ply from 0) - if (PvNode && selDepth < ss->ply + 1) - selDepth = ss->ply + 1; - - if (!rootNode) - { - // Step 2. Check for aborted search and immediate draw - if (threads.stop.load(std::memory_order_relaxed) || pos.is_draw(ss->ply) - || ss->ply >= MAX_PLY) - return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : value_draw(nodes); - - // Step 3. Mate distance pruning. Even if we mate at the next move our score - // would be at best mate_in(ss->ply + 1), but if alpha is already bigger because - // a shorter mate was found upward in the tree then there is no need to search - // because we will never beat the current alpha. Same logic but with reversed - // signs apply also in the opposite condition of being mated instead of giving - // mate. In this case, return a fail-high score. - alpha = std::max(mated_in(ss->ply), alpha); - beta = std::min(mate_in(ss->ply + 1), beta); - if (alpha >= beta) - return alpha; - } - - assert(0 <= ss->ply && ss->ply < MAX_PLY); - - Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; - bestMove = Move::none(); - priorReduction = (ss - 1)->reduction; - (ss - 1)->reduction = 0; - ss->statScore = 0; - (ss + 2)->cutoffCnt = 0; - - // Step 4. Transposition table lookup - excludedMove = ss->excludedMove; - posKey = pos.key(); - auto [ttHit, ttData, ttWriter] = tt.probe(posKey); - // Need further processing of the saved data - ss->ttHit = ttHit; - ttData.move = rootNode ? rootMoves[pvIdx].pv[0] : ttHit ? ttData.move : Move::none(); - ttData.value = ttHit ? value_from_tt(ttData.value, ss->ply, pos.rule50_count()) : VALUE_NONE; - ss->ttPv = excludedMove ? ss->ttPv : PvNode || (ttHit && ttData.is_pv); - ttCapture = ttData.move && pos.capture_stage(ttData.move); - - // Step 6. Static evaluation of the position - Value unadjustedStaticEval = VALUE_NONE; - const auto correctionValue = correction_value(*this, pos, ss); - // Skip early pruning when in check - if (ss->inCheck) - ss->staticEval = eval = (ss - 2)->staticEval; - else if (excludedMove) - unadjustedStaticEval = eval = ss->staticEval; - else if (ss->ttHit) - { - // Never assume anything about values stored in TT - unadjustedStaticEval = ttData.eval; - if (!is_valid(unadjustedStaticEval)) - unadjustedStaticEval = evaluate(pos); - - ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, correctionValue); - - // ttValue can be used as a better position evaluation - if (is_valid(ttData.value) - && (ttData.bound & (ttData.value > eval ? BOUND_LOWER : BOUND_UPPER))) - eval = ttData.value; - } - else - { - unadjustedStaticEval = evaluate(pos); - ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, correctionValue); - - // Static evaluation is saved as it was before adjustment by correction history - ttWriter.write(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_UNSEARCHED, Move::none(), - unadjustedStaticEval, tt.generation()); - } - - // Set up the improving flag, which is true if current static evaluation is - // bigger than the previous static evaluation at our turn (if we were in - // check at our previous move we go back until we weren't in check) and is - // false otherwise. The improving flag is used in various pruning heuristics. - // Similarly, opponentWorsening is true if our static evaluation is better - // for us than at the last ply. - improving = ss->staticEval > (ss - 2)->staticEval; - opponentWorsening = ss->staticEval > -(ss - 1)->staticEval; - - // Hindsight adjustment of reductions based on static evaluation difference. - if (priorReduction >= 3 && !opponentWorsening) - depth++; - if (priorReduction >= 2 && depth >= 2 && ss->staticEval + (ss - 1)->staticEval > 188) - depth--; - - // At non-PV nodes we check for an early TT cutoff - if (!PvNode && !excludedMove && ttData.depth > depth - (ttData.value <= beta) - && is_valid(ttData.value) // Can happen when !ttHit or when access race in probe() - && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER)) - && (cutNode == (ttData.value >= beta) || depth > 5)) - { - // If ttMove is quiet, update move sorting heuristics on TT hit - if (ttData.move && ttData.value >= beta) - { - // Bonus for a quiet ttMove that fails high - if (!ttCapture) - update_quiet_histories(pos, ss, *this, ttData.move, - std::min(121 * depth - 75, 932)); - - // Extra penalty for early quiet moves of the previous ply - if (prevSq != SQ_NONE && (ss - 1)->moveCount < 4 && !priorCapture) - update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -2104); - } - - // Partial workaround for the graph history interaction problem - // For high rule50 counts don't produce transposition table cutoffs. - if (pos.rule50_count() < 96) - { - if (depth >= 7 && ttData.move && pos.pseudo_legal(ttData.move) && pos.legal(ttData.move) - && !is_decisive(ttData.value)) - { - pos.do_move(ttData.move, st); - Key nextPosKey = pos.key(); - auto [ttHitNext, ttDataNext, ttWriterNext] = tt.probe(nextPosKey); - pos.undo_move(ttData.move); - - // Check that the ttValue after the tt move would also trigger a cutoff - if (!is_valid(ttDataNext.value)) - return ttData.value; - - if ((ttData.value >= beta) == (-ttDataNext.value >= beta)) - return ttData.value; - } - else - return ttData.value; - } - } - - // Step 5. Tablebases probe - if (!rootNode && !excludedMove && tbConfig.cardinality) - { - int piecesCount = pos.count(); - - if (piecesCount <= tbConfig.cardinality - && (piecesCount < tbConfig.cardinality || depth >= tbConfig.probeDepth) - && pos.rule50_count() == 0 && !pos.can_castle(ANY_CASTLING)) - { - TB::ProbeState err; - TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err); - - // Force check of time on the next occasion - if (is_mainthread()) - main_manager()->callsCnt = 0; - - if (err != TB::ProbeState::FAIL) - { - // Preferable over fetch_add to avoid locking instructions - tbHits.store(tbHits.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); - - int drawScore = tbConfig.useRule50 ? 1 : 0; - - Value tbValue = VALUE_TB - ss->ply; - - // Use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score - value = wdl < -drawScore ? -tbValue - : wdl > drawScore ? tbValue - : VALUE_DRAW + 2 * wdl * drawScore; - - Bound b = wdl < -drawScore ? BOUND_UPPER - : wdl > drawScore ? BOUND_LOWER - : BOUND_EXACT; - - if (b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha)) - { - ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, - std::min(MAX_PLY - 1, depth + 6), Move::none(), VALUE_NONE, - tt.generation()); - - return value; - } - - if (PvNode) - { - if (b == BOUND_LOWER) - bestValue = value, alpha = std::max(alpha, bestValue); - else - maxValue = value; - } - } - } - } - - if (ss->inCheck) - goto moves_loop; - - // Use static evaluation difference to improve quiet move ordering - if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) - { - int evalDiff = std::clamp(-int((ss - 1)->staticEval + ss->staticEval), -213, 175) + 59; - mainHistory[~us][((ss - 1)->currentMove).raw()] << evalDiff * 10; - if (!ttHit && type_of(pos.piece_on(prevSq)) != PAWN - && ((ss - 1)->currentMove).type_of() != PROMOTION) - sharedHistory.pawn_entry(pos)[pos.piece_on(prevSq)][prevSq] << evalDiff * 13; - } - - - // Step 7. Razoring - // If eval is really low, skip search entirely and return the qsearch value. - // For PvNodes, we must have a guard against mates being returned. - if (!PvNode && eval < alpha - 507 - 312 * depth * depth) - return qsearch(pos, ss, alpha, beta); - - // Step 8. Futility pruning: child node - // The depth condition is important for mate finding. - { - auto futility_margin = [&](Depth d) { - Value futilityMult = 77 - 22 * !ss->ttHit; - - return futilityMult * d - - (2661 * improving + 355 * opponentWorsening) * futilityMult / 1024 // - + std::abs(correctionValue) / 176900; - }; - - if (!ss->ttPv && depth < 16 && eval - futility_margin(depth) >= beta && eval >= beta - && (!ttData.move || ttCapture) && !is_loss(beta) && !is_win(eval)) - return (2 * beta + eval) / 3; - } - - // Step 9. Null move search with verification search - if (cutNode && ss->staticEval >= beta - 17 * depth - 50 * improving + 359 && !excludedMove - && pos.non_pawn_material(us) && ss->ply >= nmpMinPly && !is_loss(beta)) - { - assert((ss - 1)->currentMove != Move::null()); - - // Null move dynamic reduction based on depth - Depth R = 7 + depth / 3; - do_null_move(pos, st, ss); - - Value nullValue = -search(pos, ss + 1, -beta, -beta + 1, depth - R, false); - - undo_null_move(pos); - - // Do not return unproven mate or TB scores - if (nullValue >= beta && !is_win(nullValue)) - { - if (nmpMinPly || depth < 16) - return nullValue; - - assert(!nmpMinPly); // Recursive verification is not allowed - - // Do verification search at high depths, with null move pruning disabled - // until ply exceeds nmpMinPly. - nmpMinPly = ss->ply + 3 * (depth - R) / 4; - - Value v = search(pos, ss, beta - 1, beta, depth - R, false); - - nmpMinPly = 0; - - if (v >= beta) - return nullValue; - } - } - - improving |= ss->staticEval >= beta; - - // Step 10. Internal iterative reductions - // At sufficient depth, reduce depth for PV/Cut nodes without a TTMove. - // (*Scaler) Making IIR more aggressive scales poorly. - if (!allNode && depth >= 6 && !ttData.move && priorReduction <= 3) - depth--; - - // Step 11. ProbCut - // If we have a good enough capture (or queen promotion) and a reduced search - // returns a value much above beta, we can (almost) safely prune the previous move. - probCutBeta = beta + 229 - 63 * improving; - if (depth >= 3 - && !is_decisive(beta) - // If value from transposition table is lower than probCutBeta, don't attempt - // probCut there - && !(is_valid(ttData.value) && ttData.value < probCutBeta)) - { - assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta); - - MovePicker mp(pos, ttData.move, probCutBeta - ss->staticEval, &captureHistory); - Depth probCutDepth = depth - 4; - - while ((move = mp.next_move()) != Move::none()) - { - assert(move.is_ok()); - - if (move == excludedMove || !pos.legal(move)) - continue; - - assert(pos.capture_stage(move)); - - do_move(pos, move, st, ss); - - // Perform a preliminary qsearch to verify that the move holds - value = -qsearch(pos, ss + 1, -probCutBeta, -probCutBeta + 1); - - // If the qsearch held, perform the regular search - if (value >= probCutBeta && probCutDepth > 0) - value = -search(pos, ss + 1, -probCutBeta, -probCutBeta + 1, probCutDepth, - !cutNode); - - undo_move(pos, move); - - if (value >= probCutBeta) - { - // Save ProbCut data into transposition table - ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, - probCutDepth + 1, move, unadjustedStaticEval, tt.generation()); - - if (!is_decisive(value)) - return value - (probCutBeta - beta); - } - } - } - -moves_loop: // When in check, search starts here - - // Step 12. A small Probcut idea - probCutBeta = beta + 416; - if ((ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 && ttData.value >= probCutBeta - && !is_decisive(beta) && is_valid(ttData.value) && !is_decisive(ttData.value)) - return probCutBeta; - - const PieceToHistory* contHist[] = { - (ss - 1)->continuationHistory, (ss - 2)->continuationHistory, (ss - 3)->continuationHistory, - (ss - 4)->continuationHistory, (ss - 5)->continuationHistory, (ss - 6)->continuationHistory}; - - - MovePicker mp(pos, ttData.move, depth, &mainHistory, &lowPlyHistory, &captureHistory, contHist, - &sharedHistory, ss->ply); - - value = bestValue; - - int moveCount = 0; - - // Step 13. Loop through all pseudo-legal moves until no moves remain - // or a beta cutoff occurs. - while ((move = mp.next_move()) != Move::none()) - { - assert(move.is_ok()); - - if (move == excludedMove) - continue; - - // Check for legality - if (!pos.legal(move)) - continue; - - // At root obey the "searchmoves" option and skip moves not listed in Root - // Move List. In MultiPV mode we also skip PV moves that have been already - // searched and those of lower "TB rank" if we are in a TB root position. - if (rootNode && !std::count(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast, move)) - continue; - - ss->moveCount = ++moveCount; - - if (rootNode && is_mainthread() && nodes > 10000000) - { - main_manager()->updates.onIter( - {depth, UCIEngine::move(move, pos.is_chess960()), moveCount + pvIdx}); - } - if (PvNode) - (ss + 1)->pv = nullptr; - - extension = 0; - capture = pos.capture_stage(move); - movedPiece = pos.moved_piece(move); - givesCheck = pos.gives_check(move); - - // Calculate new depth for this move - newDepth = depth - 1; - - int delta = beta - alpha; - - Depth r = reduction(improving, depth, moveCount, delta); - - // Increase reduction for ttPv nodes (*Scaler) - // Larger values scale well - if (ss->ttPv) - r += 949; - - // Step 14. Pruning at shallow depths. - // Depth conditions are important for mate finding. - if (!rootNode && pos.non_pawn_material(us) && !is_loss(bestValue)) - { - // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold - if (moveCount >= (3 + depth * depth) / (2 - improving)) - mp.skip_quiet_moves(); - - // Reduced depth of the next LMR search - int lmrDepth = newDepth - r / 1024; - - if (capture || givesCheck) - { - Piece capturedPiece = pos.piece_on(move.to_sq()); - int captHist = captureHistory[movedPiece][move.to_sq()][type_of(capturedPiece)]; - - // Futility pruning for captures - if (!givesCheck && lmrDepth < 7) - { - Value futilityValue = ss->staticEval + 235 + 211 * lmrDepth - + PieceValue[capturedPiece] + 126 * captHist / 1024; - - if (futilityValue <= alpha) - continue; - } - - // SEE based pruning for captures and checks - // Avoid pruning sacrifices of our last piece for stalemate - int margin = std::max(185 * depth + captHist / 28, 0); - if ((alpha >= VALUE_DRAW || pos.non_pawn_material(us) != PieceValue[movedPiece]) - && !pos.see_ge(move, -margin)) - continue; - } - else - { - int history = (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()] - + sharedHistory.pawn_entry(pos)[movedPiece][move.to_sq()]; - - // Continuation history based pruning - if (history < -3826 * depth) - continue; - - history += 73 * mainHistory[us][move.raw()] / 32; - - // (*Scaler): Generally, lower divisors scales well - lmrDepth += history / 2917; - - Value futilityValue = ss->staticEval + 42 + 157 * !bestMove + 120 * lmrDepth - + 86 * (ss->staticEval > alpha); - - // Futility pruning: parent node - // (*Scaler): Generally, more frequent futility pruning - // scales well - if (!ss->inCheck && lmrDepth < 13 && futilityValue <= alpha) - { - if (bestValue <= futilityValue && !is_decisive(bestValue) - && !is_win(futilityValue)) - bestValue = futilityValue; - continue; - } - - lmrDepth = std::max(lmrDepth, 0); - - // Prune moves with negative SEE - if (!pos.see_ge(move, -25 * lmrDepth * lmrDepth)) - continue; - } - } - - // Step 15. Extensions - // Singular extension search. If all moves but one - // fail low on a search of (alpha-s, beta-s), and just one fails high on - // (alpha, beta), then that move is singular and should be extended. To - // verify this we do a reduced search on the position excluding the ttMove - // and if the result is lower than ttValue minus a margin, then we will - // extend the ttMove. Recursive singular search is avoided. - - // (*Scaler) Generally, higher singularBeta (i.e closer to ttValue) - // and lower extension margins scale well. - if (!rootNode && move == ttData.move && !excludedMove && depth >= 6 + ss->ttPv - && is_valid(ttData.value) && !is_decisive(ttData.value) && (ttData.bound & BOUND_LOWER) - && ttData.depth >= depth - 3 && !is_shuffling(move, ss, pos)) - { - Value singularBeta = ttData.value - (58 + 67 * (ss->ttPv && !PvNode)) * depth / 57; - Depth singularDepth = newDepth / 2; - - ss->excludedMove = move; - value = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); - ss->excludedMove = Move::none(); - - if (value < singularBeta) - { - int corrValAdj = std::abs(correctionValue) / 220870; - int doubleMargin = -4 + 213 * PvNode - 196 * !ttCapture - corrValAdj - - 943 * ttMoveHistory / 123477 - (ss->ply > rootDepth) * 45; - int tripleMargin = 73 + 324 * PvNode - 229 * !ttCapture + 87 * ss->ttPv - corrValAdj - - (ss->ply > rootDepth) * 50; - - extension = - 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin); - - depth++; - } - - // Multi-cut pruning - // Our ttMove is assumed to fail high based on the bound of the TT entry, - // and if after excluding the ttMove with a reduced search we fail high - // over the original beta, we assume this expected cut-node is not - // singular (multiple moves fail high), and we can prune the whole - // subtree by returning a softbound. - else if (value >= beta && !is_decisive(value)) - { - ttMoveHistory << std::max(-394 - 105 * depth, -3692); - return value; - } - - // Negative extensions - // If other moves failed high over (ttValue - margin) without the - // ttMove on a reduced search, but we cannot do multi-cut because - // (ttValue - margin) is lower than the original beta, we do not know - // if the ttMove is singular or can do a multi-cut, so we reduce the - // ttMove in favor of other moves based on some conditions: - - // If the ttMove is assumed to fail high over current beta - else if (ttData.value >= beta) - extension = -3; - - // If we are on a cutNode but the ttMove is not assumed to fail high - // over current beta - else if (cutNode) - extension = -2; - } - - // Step 16. Make the move - do_move(pos, move, st, givesCheck, ss); - - // Add extension to new depth - newDepth += extension; - uint64_t nodeCount = rootNode ? uint64_t(nodes) : 0; - - // Decrease reduction for PvNodes (*Scaler) - if (ss->ttPv) - r -= 2823 + PvNode * 1013 + (ttData.value > alpha) * 910 - + (ttData.depth >= depth) * (933 + cutNode * 979); - - r += 690; // Base reduction offset to compensate for other tweaks - r -= moveCount * 70; - r -= std::abs(correctionValue) / 26878; - - // Increase reduction for cut nodes - if (cutNode) - r += 3582 + 1015 * !ttData.move; - - // Increase reduction if ttMove is a capture - if (ttCapture) - r += 1075; - - // Increase reduction if next ply has a lot of fail high - if ((ss + 1)->cutoffCnt > 1) - r += 249 + 1073 * ((ss + 1)->cutoffCnt > 2) + 1064 * allNode; - - // For first picked move (ttMove) reduce reduction - if (move == ttData.move) - r -= 2069; - - if (capture) - ss->statScore = 892 * int(PieceValue[pos.captured_piece()]) / 128 - + captureHistory[movedPiece][move.to_sq()][type_of(pos.captured_piece())]; - else - ss->statScore = 2 * mainHistory[us][move.raw()] - + (*contHist[0])[movedPiece][move.to_sq()] - + (*contHist[1])[movedPiece][move.to_sq()]; - - // Decrease/increase reduction for moves with a good/bad history - r -= ss->statScore * 454 / 4096; - - // Scale up reductions for expected ALL nodes - if (allNode) - r += r * 276 / (256 * depth + 254); - - // Step 17. Late moves reduction / extension (LMR) - if (depth >= 2 && moveCount > 1) - { - // In general we want to cap the LMR depth search at newDepth, but when - // reduction is negative, we allow this move a limited search extension - // beyond the first move depth. - // To prevent problems when the max value is less than the min value, - // std::clamp has been replaced by a more robust implementation. - Depth d = std::max(1, std::min(newDepth - r / 1024, newDepth + 2)) + PvNode; - - ss->reduction = newDepth - d; - value = -search(pos, ss + 1, -(alpha + 1), -alpha, d, true); - ss->reduction = 0; - - // Do a full-depth search when reduced LMR search fails high - // (*Scaler) Shallower searches here don't scale well - if (value > alpha) - { - // Adjust full-depth search based on LMR results - if the result was - // good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = d < newDepth && value > bestValue + 50; - const bool doShallowerSearch = value < bestValue + 9; - - newDepth += doDeeperSearch - doShallowerSearch; - - if (newDepth > d) - value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth, !cutNode); - - // Post LMR continuation history updates - update_continuation_histories(ss, movedPiece, move.to_sq(), 1342); - } - } - - // Step 18. Full-depth search when LMR is skipped - else if (!PvNode || moveCount > 1) - { - // Increase reduction if ttMove is not present - if (!ttData.move) - r += 993; - - // Note that if expected reduction is high, we reduce search depth here - value = -search(pos, ss + 1, -(alpha + 1), -alpha, - newDepth - (r > 4302) - (r > 5919 && newDepth > 2), !cutNode); - } - - // For PV nodes only, do a full PV search on the first move or after a fail high, - // otherwise let the parent node fail low with value <= alpha and try another move. - if (PvNode && (moveCount == 1 || value > alpha)) - { - (ss + 1)->pv = pv; - (ss + 1)->pv[0] = Move::none(); - - // Extend move from transposition table if we are about to dive into qsearch. - // decisive score handling improves mate finding and retrograde analysis. - if (move == ttData.move - && ((is_valid(ttData.value) && is_decisive(ttData.value) && ttData.depth > 0) - || ttData.depth > 1)) - newDepth = std::max(newDepth, 1); - - value = -search(pos, ss + 1, -beta, -alpha, newDepth, false); - } - - // Step 19. Undo move - undo_move(pos, move); - - assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - - // Step 20. Check for a new best move - // Finished searching the move. If a stop occurred, the return value of - // the search cannot be trusted, and we return immediately without updating - // best move, principal variation nor transposition table. - if (threads.stop.load(std::memory_order_relaxed)) - return VALUE_ZERO; - - if (rootNode) - { - RootMove& rm = *std::find(rootMoves.begin(), rootMoves.end(), move); - - rm.effort += nodes - nodeCount; - - rm.averageScore = - rm.averageScore != -VALUE_INFINITE ? (value + rm.averageScore) / 2 : value; - - rm.meanSquaredScore = rm.meanSquaredScore != -VALUE_INFINITE * VALUE_INFINITE - ? (value * std::abs(value) + rm.meanSquaredScore) / 2 - : value * std::abs(value); - - // PV move or new best move? - if (moveCount == 1 || value > alpha) - { - rm.score = rm.uciScore = value; - rm.selDepth = selDepth; - rm.scoreLowerbound = rm.scoreUpperbound = false; - - if (value >= beta) - { - rm.scoreLowerbound = true; - rm.uciScore = beta; - } - else if (value <= alpha) - { - rm.scoreUpperbound = true; - rm.uciScore = alpha; - } - - rm.pv.resize(1); - - assert((ss + 1)->pv); - - for (Move* m = (ss + 1)->pv; *m != Move::none(); ++m) - rm.pv.push_back(*m); - - // We record how often the best move has been changed in each iteration. - // This information is used for time management. In MultiPV mode, - // we must take care to only do this for the first PV line. - if (moveCount > 1 && !pvIdx) - ++bestMoveChanges; - } - else - // All other moves but the PV, are set to the lowest value: this - // is not a problem when sorting because the sort is stable and the - // move position in the list is preserved - just the PV is pushed up. - rm.score = -VALUE_INFINITE; - } - - // In case we have an alternative move equal in eval to the current bestmove, - // promote it to bestmove by pretending it just exceeds alpha (but not beta). - int inc = (value == bestValue && ss->ply + 2 >= rootDepth && (int(nodes) & 14) == 0 - && !is_win(std::abs(value) + 1)); - - if (value + inc > bestValue) - { - bestValue = value; - - if (value + inc > alpha) - { - bestMove = move; - - if (PvNode && !rootNode) // Update pv even in fail-high case - update_pv(ss->pv, move, (ss + 1)->pv); - - if (value >= beta) - { - // (*Scaler) Infrequent and small updates scale well - ss->cutoffCnt += (extension < 2) || PvNode; - assert(value >= beta); // Fail high - break; - } - - // Reduce other moves if we have found at least one score improvement - if (depth > 2 && depth < 14 && !is_decisive(value)) - depth -= 2; - - assert(depth > 0); - alpha = value; // Update alpha! Always alpha < beta - } - } - - // If the move is worse than some previously searched move, - // remember it, to update its stats later. - if (move != bestMove && moveCount <= SEARCHEDLIST_CAPACITY) - { - if (capture) - capturesSearched.push_back(move); - else - quietsSearched.push_back(move); - } - } - - // Step 21. Check for mate and stalemate - // All legal moves have been searched and if there are no legal moves, it - // must be a mate or a stalemate. If we are in a singular extension search then - // return a fail low score. - - assert(moveCount || !ss->inCheck || excludedMove || !MoveList(pos).size()); - - // Adjust best value for fail high cases - if (bestValue >= beta && !is_decisive(bestValue) && !is_decisive(alpha)) - bestValue = (bestValue * depth + beta) / (depth + 1); - - if (!moveCount) - bestValue = excludedMove ? alpha : ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW; - - // If there is a move that produces search value greater than alpha, - // we update the stats of searched moves. - else if (bestMove) - { - update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, capturesSearched, depth, - ttData.move); - if (!PvNode) - ttMoveHistory << (bestMove == ttData.move ? 804 : -860); - } - - // Bonus for prior quiet countermove that caused the fail low - else if (!priorCapture && prevSq != SQ_NONE) - { - int bonusScale = -227; - bonusScale -= (ss - 1)->statScore / 101; - bonusScale += std::min(58 * depth, 488); - bonusScale += 172 * ((ss - 1)->moveCount > 8); - bonusScale += 150 * (!ss->inCheck && bestValue <= ss->staticEval - 113); - bonusScale += 154 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 68); - - bonusScale = std::max(bonusScale, 0); - - // scaledBonus ranges from 0 to roughly 2.3M, overflows happen for multipliers larger than 900 - const int scaledBonus = std::min(137 * depth - 79, 1394) * bonusScale; - - update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, - scaledBonus * 222 / 16384); - - mainHistory[~us][((ss - 1)->currentMove).raw()] << scaledBonus * 221 / 32768; - - if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) - sharedHistory.pawn_entry(pos)[pos.piece_on(prevSq)][prevSq] << scaledBonus * 286 / 8192; - } - - // Bonus for prior capture countermove that caused the fail low - else if (priorCapture && prevSq != SQ_NONE) - { - Piece capturedPiece = pos.captured_piece(); - assert(capturedPiece != NO_PIECE); - captureHistory[pos.piece_on(prevSq)][prevSq][type_of(capturedPiece)] << 993; - } - - if (PvNode) - bestValue = std::min(bestValue, maxValue); - - // If no good move is found and the previous position was ttPv, then the previous - // opponent move is probably good and the new position is added to the search tree. - if (bestValue <= alpha) - ss->ttPv = ss->ttPv || (ss - 1)->ttPv; - - // Write gathered information in transposition table. Note that the - // static evaluation is saved as it was before correction history. - if (!excludedMove && !(rootNode && pvIdx)) - ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, - bestValue >= beta ? BOUND_LOWER - : PvNode && bestMove ? BOUND_EXACT - : BOUND_UPPER, - moveCount != 0 ? depth : std::min(MAX_PLY - 1, depth + 6), bestMove, - unadjustedStaticEval, tt.generation()); - - // Adjust correction history if the best move is not a capture - // and the error direction matches whether we are above/below bounds. - if (!ss->inCheck && !(bestMove && pos.capture(bestMove)) - && (bestValue > ss->staticEval) == bool(bestMove)) - { - auto bonus = std::clamp(int(bestValue - ss->staticEval) * depth / (bestMove ? 10 : 8), - -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); - update_correction_history(pos, ss, *this, bonus); - } - - assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); - - return bestValue; -} - - -// Quiescence search function, which is called by the main search function with -// depth zero, or recursively with further decreasing depth. With depth <= 0, we -// "should" be using static eval only, but tactical moves may confuse the static eval. -// To fight this horizon effect, we implement this qsearch of tactical moves. -// See https://www.chessprogramming.org/Horizon_Effect -// and https://www.chessprogramming.org/Quiescence_Search -template -Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) { - - static_assert(nodeType != Root); - constexpr bool PvNode = nodeType == PV; - - assert(alpha >= -VALUE_INFINITE && alpha < beta && beta <= VALUE_INFINITE); - assert(PvNode || (alpha == beta - 1)); - - // Check if we have an upcoming move that draws by repetition - if (alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply)) - { - alpha = value_draw(nodes); - if (alpha >= beta) - return alpha; - } - - Move pv[MAX_PLY + 1]; - StateInfo st; - - Key posKey; - Move move, bestMove; - Value bestValue, value, futilityBase; - bool pvHit, givesCheck, capture; - int moveCount; - - // Step 1. Initialize node - if (PvNode) - { - (ss + 1)->pv = pv; - ss->pv[0] = Move::none(); - } - - bestMove = Move::none(); - ss->inCheck = pos.checkers(); - moveCount = 0; - - // Used to send selDepth info to GUI (selDepth counts from 1, ply from 0) - if (PvNode && selDepth < ss->ply + 1) - selDepth = ss->ply + 1; - - // Step 2. Check for an immediate draw or maximum ply reached - if (pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) - return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : VALUE_DRAW; - - assert(0 <= ss->ply && ss->ply < MAX_PLY); - - // Step 3. Transposition table lookup - posKey = pos.key(); - auto [ttHit, ttData, ttWriter] = tt.probe(posKey); - // Need further processing of the saved data - ss->ttHit = ttHit; - ttData.move = ttHit ? ttData.move : Move::none(); - ttData.value = ttHit ? value_from_tt(ttData.value, ss->ply, pos.rule50_count()) : VALUE_NONE; - pvHit = ttHit && ttData.is_pv; - - // At non-PV nodes we check for an early TT cutoff - if (!PvNode && ttData.depth >= DEPTH_QS - && is_valid(ttData.value) // Can happen when !ttHit or when access race in probe() - && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER))) - return ttData.value; - - // Step 4. Static evaluation of the position - Value unadjustedStaticEval = VALUE_NONE; - if (ss->inCheck) - bestValue = futilityBase = -VALUE_INFINITE; - else - { - const auto correctionValue = correction_value(*this, pos, ss); - - if (ss->ttHit) - { - // Never assume anything about values stored in TT - unadjustedStaticEval = ttData.eval; - - if (!is_valid(unadjustedStaticEval)) - unadjustedStaticEval = evaluate(pos); - - ss->staticEval = bestValue = - to_corrected_static_eval(unadjustedStaticEval, correctionValue); - - // ttValue can be used as a better position evaluation - if (is_valid(ttData.value) && !is_decisive(ttData.value) - && (ttData.bound & (ttData.value > bestValue ? BOUND_LOWER : BOUND_UPPER))) - bestValue = ttData.value; - } - else - { - unadjustedStaticEval = evaluate(pos); - ss->staticEval = bestValue = - to_corrected_static_eval(unadjustedStaticEval, correctionValue); - } - - // Stand pat. Return immediately if static value is at least beta - if (bestValue >= beta) - { - if (!is_decisive(bestValue)) - bestValue = (bestValue + beta) / 2; - - if (!ss->ttHit) - ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, - DEPTH_UNSEARCHED, Move::none(), unadjustedStaticEval, - tt.generation()); - return bestValue; - } - - if (bestValue > alpha) - alpha = bestValue; - - futilityBase = ss->staticEval + 351; - } - - const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory}; - - Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; - - // Initialize a MovePicker object for the current position, and prepare to search - // the moves. We presently use two stages of move generator in quiescence search: - // captures, or evasions only when in check. - MovePicker mp(pos, ttData.move, DEPTH_QS, &mainHistory, &lowPlyHistory, &captureHistory, - contHist, &sharedHistory, ss->ply); - - // Step 5. Loop through all pseudo-legal moves until no moves remain or a beta - // cutoff occurs. - while ((move = mp.next_move()) != Move::none()) - { - assert(move.is_ok()); - - if (!pos.legal(move)) - continue; - - givesCheck = pos.gives_check(move); - capture = pos.capture_stage(move); - - moveCount++; - - // Step 6. Pruning - if (!is_loss(bestValue)) - { - // Futility pruning and moveCount pruning - if (!givesCheck && move.to_sq() != prevSq && !is_loss(futilityBase) - && move.type_of() != PROMOTION) - { - if (moveCount > 2) - continue; - - Value futilityValue = futilityBase + PieceValue[pos.piece_on(move.to_sq())]; - - // If static eval + value of piece we are going to capture is - // much lower than alpha, we can prune this move. - if (futilityValue <= alpha) - { - bestValue = std::max(bestValue, futilityValue); - continue; - } - - // If static exchange evaluation is low enough - // we can prune this move. - if (!pos.see_ge(move, alpha - futilityBase)) - { - bestValue = std::max(bestValue, std::min(alpha, futilityBase)); - continue; - } - } - - // Skip non-captures - if (!capture) - continue; - - // Do not search moves with bad enough SEE values - if (!pos.see_ge(move, -72)) - continue; - } - - // Step 7. Make and search the move - do_move(pos, move, st, givesCheck, ss); - - value = -qsearch(pos, ss + 1, -beta, -alpha); - undo_move(pos, move); - - assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - - // Step 8. Check for a new best move - if (value > bestValue) - { - bestValue = value; - - if (value > alpha) - { - bestMove = move; - - if (PvNode) // Update pv even in fail-high case - update_pv(ss->pv, move, (ss + 1)->pv); - - if (value < beta) // Update alpha here! - alpha = value; - else - break; // Fail high - } - } - } - - // Step 9. Check for mate - // All legal moves have been searched. A special case: if we are - // in check and no legal moves were found, it is checkmate. - if (ss->inCheck && bestValue == -VALUE_INFINITE) - { - assert(!MoveList(pos).size()); - return mated_in(ss->ply); // Plies to mate from the root - } - - if (!is_decisive(bestValue) && bestValue > beta) - bestValue = (bestValue + beta) / 2; - - Color us = pos.side_to_move(); - if (!ss->inCheck && !moveCount && !pos.non_pawn_material(us) - && type_of(pos.captured_piece()) >= ROOK) - { - if (!((us == WHITE ? shift(pos.pieces(us, PAWN)) - : shift(pos.pieces(us, PAWN))) - & ~pos.pieces())) // no pawn pushes available - { - pos.state()->checkersBB = Rank1BB; // search for legal king-moves only - if (!MoveList(pos).size()) // stalemate - bestValue = VALUE_DRAW; - pos.state()->checkersBB = 0; - } - } - - // Save gathered info in transposition table. The static evaluation - // is saved as it was before adjustment by correction history. - ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), pvHit, - bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, DEPTH_QS, bestMove, - unadjustedStaticEval, tt.generation()); - - assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); - - return bestValue; -} - -Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) const { - int reductionScale = reductions[d] * reductions[mn]; - return reductionScale - delta * 576 / rootDelta + !i * reductionScale * 217 / 512 + 1182; -} - -// elapsed() returns the time elapsed since the search started. If the -// 'nodestime' option is enabled, it will return the count of nodes searched -// instead. This function is called to check whether the search should be -// stopped based on predefined thresholds like time limits or nodes searched. -// -// elapsed_time() returns the actual time elapsed since the start of the search. -// This function is intended for use only when printing PV outputs, and not used -// for making decisions within the search algorithm itself. -TimePoint Search::Worker::elapsed() const { - return main_manager()->tm.elapsed([this]() { return threads.nodes_searched(); }); -} - -TimePoint Search::Worker::elapsed_time() const { return main_manager()->tm.elapsed_time(); } - -Value Search::Worker::evaluate(const Position& pos) { - return Eval::evaluate(networks[numaAccessToken], pos, accumulatorStack, refreshTable, - optimism[pos.side_to_move()]); -} - -namespace { -// Adjusts a mate or TB score from "plies to mate from the root" to -// "plies to mate from the current position". Standard scores are unchanged. -// The function is called before storing a value in the transposition table. -Value value_to_tt(Value v, int ply) { return is_win(v) ? v + ply : is_loss(v) ? v - ply : v; } - - -// Inverse of value_to_tt(): it adjusts a mate or TB score from the transposition -// table (which refers to the plies to mate/be mated from current position) to -// "plies to mate/be mated (TB win/loss) from the root". However, to avoid -// potentially false mate or TB scores related to the 50 moves rule and the -// graph history interaction, we return the highest non-TB score instead. -Value value_from_tt(Value v, int ply, int r50c) { - - if (!is_valid(v)) - return VALUE_NONE; - - // handle TB win or better - if (is_win(v)) - { - // Downgrade a potentially false mate score - if (v >= VALUE_MATE_IN_MAX_PLY && VALUE_MATE - v > 100 - r50c) - return VALUE_TB_WIN_IN_MAX_PLY - 1; - - // Downgrade a potentially false TB score. - if (VALUE_TB - v > 100 - r50c) - return VALUE_TB_WIN_IN_MAX_PLY - 1; - - return v - ply; - } - - // handle TB loss or worse - if (is_loss(v)) - { - // Downgrade a potentially false mate score. - if (v <= VALUE_MATED_IN_MAX_PLY && VALUE_MATE + v > 100 - r50c) - return VALUE_TB_LOSS_IN_MAX_PLY + 1; - - // Downgrade a potentially false TB score. - if (VALUE_TB + v > 100 - r50c) - return VALUE_TB_LOSS_IN_MAX_PLY + 1; - - return v + ply; - } - - return v; -} - - -// Adds current move and appends child pv[] -void update_pv(Move* pv, Move move, const Move* childPv) { - - for (*pv++ = move; childPv && *childPv != Move::none();) - *pv++ = *childPv++; - *pv = Move::none(); -} - - -// Updates stats at the end of search() when a bestMove is found -void update_all_stats(const Position& pos, - Stack* ss, - Search::Worker& workerThread, - Move bestMove, - Square prevSq, - SearchedList& quietsSearched, - SearchedList& capturesSearched, - Depth depth, - Move ttMove) { - - CapturePieceToHistory& captureHistory = workerThread.captureHistory; - Piece movedPiece = pos.moved_piece(bestMove); - PieceType capturedPiece; - - int bonus = - std::min(124 * depth - 84, 1376) + 349 * (bestMove == ttMove) + (ss - 1)->statScore / 32; - int malus = std::min(872 * depth - 212, 2104); - - if (!pos.capture_stage(bestMove)) - { - update_quiet_histories(pos, ss, workerThread, bestMove, bonus * 810 / 1024); - - int actualMalus = malus * 1159 / 1024; - // Decrease stats for all non-best quiet moves - for (Move move : quietsSearched) - { - actualMalus = actualMalus * 963 / 1024; - update_quiet_histories(pos, ss, workerThread, move, -actualMalus); - } - } - else - { - // Increase stats for the best move in case it was a capture move - capturedPiece = type_of(pos.piece_on(bestMove.to_sq())); - captureHistory[movedPiece][bestMove.to_sq()][capturedPiece] << bonus * 1290 / 1024; - } - - // Extra penalty for a quiet early move that was not a TT move in - // previous ply when it gets refuted. - if (prevSq != SQ_NONE && ((ss - 1)->moveCount == 1 + (ss - 1)->ttHit) && !pos.captured_piece()) - update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -malus * 596 / 1024); - - // Decrease stats for all non-best capture moves - for (Move move : capturesSearched) - { - movedPiece = pos.moved_piece(move); - capturedPiece = type_of(pos.piece_on(move.to_sq())); - captureHistory[movedPiece][move.to_sq()][capturedPiece] << -malus * 1561 / 1024; - } -} - - -// Updates histories of the move pairs formed by moves -// at ply -1, -2, -3, -4, and -6 with current move. -void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - static constexpr std::array conthist_bonuses = { - {{1, 1106}, {2, 705}, {3, 316}, {4, 572}, {5, 126}, {6, 427}}}; - - // Multipliers for positive history consistency - constexpr int CMHCMultipliers[] = {87, 94, 106, 118, 114, 128, 128}; - int positiveCount = 0; - - for (const auto [i, weight] : conthist_bonuses) - { - // Only update the first 2 continuation histories if we are in check - if (ss->inCheck && i > 2) - break; - - if (((ss - i)->currentMove).is_ok()) - { - auto& historyEntry = (*(ss - i)->continuationHistory)[pc][to]; - if (historyEntry > 0) - positiveCount++; - - int multiplier = CMHCMultipliers[positiveCount]; - historyEntry << (bonus * weight * multiplier / 131072) + 82 * (i < 2); - } - } -} - -// Updates move sorting heuristics - -void update_quiet_histories( - const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { - - Color us = pos.side_to_move(); - workerThread.mainHistory[us][move.raw()] << bonus; // Untuned to prevent duplicate effort - - if (ss->ply < LOW_PLY_HISTORY_SIZE) - workerThread.lowPlyHistory[ss->ply][move.raw()] << bonus * 714 / 1024; - - update_continuation_histories(ss, pos.moved_piece(move), move.to_sq(), bonus * 898 / 1024); - - workerThread.sharedHistory.pawn_entry(pos)[pos.moved_piece(move)][move.to_sq()] - << bonus * (bonus > 0 ? 967 : 535) / 1024; -} - -} - -// When playing with strength handicap, choose the best move among a set of -// RootMoves using a statistical rule dependent on 'level'. Idea by Heinz van Saanen. -Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) { - static PRNG rng(now()); // PRNG sequence should be non-deterministic - - // RootMoves are already sorted by score in descending order - Value topScore = rootMoves[0].score; - int delta = std::min(topScore - rootMoves[multiPV - 1].score, int(PawnValue)); - int maxScore = -VALUE_INFINITE; - double weakness = 120 - 2 * level; - - // Choose best move. For each move score we add two terms, both dependent on - // weakness. One is deterministic and bigger for weaker levels, and one is - // random. Then we choose the move with the resulting highest score. - for (size_t i = 0; i < multiPV; ++i) - { - // This is our magic formula - int push = int(weakness * int(topScore - rootMoves[i].score) - + delta * (rng.rand() % int(weakness))) - / 128; - - if (rootMoves[i].score + push >= maxScore) - { - maxScore = rootMoves[i].score + push; - best = rootMoves[i].pv[0]; - } - } - - return best; -} - -// Used to print debug info and, more importantly, to detect -// when we are out of available time and thus stop the search. -void SearchManager::check_time(Search::Worker& worker) { - if (--callsCnt > 0) - return; - - // When using nodes, ensure checking rate is not lower than 0.1% of nodes - callsCnt = worker.limits.nodes ? std::min(512, int(worker.limits.nodes / 1024)) : 512; - - static TimePoint lastInfoTime = now(); - - TimePoint elapsed = tm.elapsed([&worker]() { return worker.threads.nodes_searched(); }); - TimePoint tick = worker.limits.startTime + elapsed; - - if (tick - lastInfoTime >= 1000) - { - lastInfoTime = tick; - dbg_print(); - } - - // We should not stop pondering until told so by the GUI - if (ponder) - return; - - if ( - // Later we rely on the fact that we can at least use the mainthread previous - // root-search score and PV in a multithreaded environment to prove mated-in scores. - worker.completedDepth >= 1 - && ((worker.limits.use_time_management() && (elapsed > tm.maximum() || stopOnPonderhit)) - || (worker.limits.movetime && elapsed >= worker.limits.movetime) - || (worker.limits.nodes && worker.threads.nodes_searched() >= worker.limits.nodes))) - worker.threads.stop = true; -} - -// Used to correct and extend PVs for moves that have a TB (but not a mate) score. -// Keeps the search based PV for as long as it is verified to maintain the game -// outcome, truncates afterwards. Finally, extends to mate the PV, providing a -// possible continuation (but not a proven mating line). -void syzygy_extend_pv(const OptionsMap& options, - const Search::LimitsType& limits, - Position& pos, - RootMove& rootMove, - Value& v) { - - auto t_start = std::chrono::steady_clock::now(); - int moveOverhead = int(options["Move Overhead"]); - bool rule50 = bool(options["Syzygy50MoveRule"]); - - // Do not use more than moveOverhead / 2 time, if time management is active - auto time_abort = [&t_start, &moveOverhead, &limits]() -> bool { - auto t_end = std::chrono::steady_clock::now(); - return limits.use_time_management() - && 2 * std::chrono::duration(t_end - t_start).count() - > moveOverhead; - }; - - std::list sts; - - // Step 0, do the rootMove, no correction allowed, as needed for MultiPV in TB. - auto& stRoot = sts.emplace_back(); - pos.do_move(rootMove.pv[0], stRoot); - int ply = 1; - - // Step 1, walk the PV to the last position in TB with correct decisive score - while (size_t(ply) < rootMove.pv.size()) - { - Move& pvMove = rootMove.pv[ply]; - - RootMoves legalMoves; - for (const auto& m : MoveList(pos)) - legalMoves.emplace_back(m); - - Tablebases::Config config = - Tablebases::rank_root_moves(options, pos, legalMoves, false, time_abort); - RootMove& rm = *std::find(legalMoves.begin(), legalMoves.end(), pvMove); - - if (legalMoves[0].tbRank != rm.tbRank) - break; - - ply++; - - auto& st = sts.emplace_back(); - pos.do_move(pvMove, st); - - // Do not allow for repetitions or drawing moves along the PV in TB regime - if (config.rootInTB && ((rule50 && pos.is_draw(ply)) || pos.is_repetition(ply))) - { - pos.undo_move(pvMove); - ply--; - break; - } - - // Full PV shown will thus be validated and end in TB. - // If we cannot validate the full PV in time, we do not show it. - if (config.rootInTB && time_abort()) - break; - } - - // Resize the PV to the correct part - rootMove.pv.resize(ply); - - // Step 2, now extend the PV to mate, as if the user explored syzygy-tables.info - // using top ranked moves (minimal DTZ), which gives optimal mates only for simple - // endgames e.g. KRvK. - while (!(rule50 && pos.is_draw(0))) - { - if (time_abort()) - break; - - RootMoves legalMoves; - for (const auto& m : MoveList(pos)) - { - auto& rm = legalMoves.emplace_back(m); - StateInfo tmpSI; - pos.do_move(m, tmpSI); - // Give a score of each move to break DTZ ties restricting opponent mobility, - // but not giving the opponent a capture. - for (const auto& mOpp : MoveList(pos)) - rm.tbRank -= pos.capture(mOpp) ? 100 : 1; - pos.undo_move(m); - } - - // Mate found - if (legalMoves.size() == 0) - break; - - // Sort moves according to their above assigned rank. - // This will break ties for moves with equal DTZ in rank_root_moves. - std::stable_sort( - legalMoves.begin(), legalMoves.end(), - [](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; }); - - // The winning side tries to minimize DTZ, the losing side maximizes it - Tablebases::Config config = - Tablebases::rank_root_moves(options, pos, legalMoves, true, time_abort); - - // If DTZ is not available we might not find a mate, so we bail out - if (!config.rootInTB || config.cardinality > 0) - break; - - ply++; - - Move& pvMove = legalMoves[0].pv[0]; - rootMove.pv.push_back(pvMove); - auto& st = sts.emplace_back(); - pos.do_move(pvMove, st); - } - - // Finding a draw in this function is an exceptional case, that cannot happen when rule50 is false or - // during engine game play, since we have a winning score, and play correctly - // with TB support. However, it can be that a position is draw due to the 50 move - // rule if it has been been reached on the board with a non-optimal 50 move counter - // (e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106 ) which TB with dtz counter rounding - // cannot always correctly rank. See also - // https://github.com/official-stockfish/Stockfish/issues/5175#issuecomment-2058893495 - // We adjust the score to match the found PV. Note that a TB loss score can be - // displayed if the engine did not find a drawing move yet, but eventually search - // will figure it out (e.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1 ) - if (pos.is_draw(0)) - v = VALUE_DRAW; - - // Undo the PV moves - for (auto it = rootMove.pv.rbegin(); it != rootMove.pv.rend(); ++it) - pos.undo_move(*it); - - // Inform if we couldn't get a full extension in time - if (time_abort()) - sync_cout - << "info string Syzygy based PV extension requires more time, increase Move Overhead as needed." - << sync_endl; -} - -void SearchManager::pv(Search::Worker& worker, - const ThreadPool& threads, - const TranspositionTable& tt, - Depth depth) { - - const auto nodes = threads.nodes_searched(); - auto& rootMoves = worker.rootMoves; - auto& pos = worker.rootPos; - size_t pvIdx = worker.pvIdx; - size_t multiPV = std::min(size_t(worker.options["MultiPV"]), rootMoves.size()); - uint64_t tbHits = threads.tb_hits() + (worker.tbConfig.rootInTB ? rootMoves.size() : 0); - - for (size_t i = 0; i < multiPV; ++i) - { - bool updated = rootMoves[i].score != -VALUE_INFINITE; - - if (depth == 1 && !updated && i > 0) - continue; - - Depth d = updated ? depth : std::max(1, depth - 1); - Value v = updated ? rootMoves[i].uciScore : rootMoves[i].previousScore; - - if (v == -VALUE_INFINITE) - v = VALUE_ZERO; - - bool tb = worker.tbConfig.rootInTB && std::abs(v) <= VALUE_TB; - v = tb ? rootMoves[i].tbScore : v; - - bool isExact = i != pvIdx || tb || !updated; // tablebase- and previous-scores are exact - - // Potentially correct and extend the PV, and in exceptional cases v - if (is_decisive(v) && std::abs(v) < VALUE_MATE_IN_MAX_PLY - && ((!rootMoves[i].scoreLowerbound && !rootMoves[i].scoreUpperbound) || isExact)) - syzygy_extend_pv(worker.options, worker.limits, pos, rootMoves[i], v); - - std::string pv; - for (Move m : rootMoves[i].pv) - pv += UCIEngine::move(m, pos.is_chess960()) + " "; - - // Remove last whitespace - if (!pv.empty()) - pv.pop_back(); - - auto wdl = worker.options["UCI_ShowWDL"] ? UCIEngine::wdl(v, pos) : ""; - auto bound = rootMoves[i].scoreLowerbound - ? "lowerbound" - : (rootMoves[i].scoreUpperbound ? "upperbound" : ""); - - InfoFull info; - - info.depth = d; - info.selDepth = rootMoves[i].selDepth; - info.multiPV = i + 1; - info.score = {v, pos}; - info.wdl = wdl; - - if (!isExact) - info.bound = bound; - - TimePoint time = std::max(TimePoint(1), tm.elapsed_time()); - info.timeMs = time; - info.nodes = nodes; - info.nps = nodes * 1000 / time; - info.tbHits = tbHits; - info.pv = pv; - info.hashfull = tt.hashfull(); - - updates.onUpdateFull(info); - } -} - -// Called in case we have no ponder move before exiting the search, -// for instance, in case we stop the search during a fail high at root. -// We try hard to have a ponder move to return to the GUI, -// otherwise in case of 'ponder on' we have nothing to think about. -bool RootMove::extract_ponder_from_tt(const TranspositionTable& tt, Position& pos) { - - StateInfo st; - - assert(pv.size() == 1); - if (pv[0] == Move::none()) - return false; - - pos.do_move(pv[0], st, &tt); - - auto [ttHit, ttData, ttWriter] = tt.probe(pos.key()); - if (ttHit) - { - if (MoveList(pos).contains(ttData.move)) - pv.push_back(ttData.move); - } - - pos.undo_move(pv[0]); - return pv.size() > 1; -} - - -} // namespace Stockfish diff --git a/src/search.h b/src/search.h deleted file mode 100644 index 202f7c8db2e6002353c4c691de9f6d8db4ee5957..0000000000000000000000000000000000000000 --- a/src/search.h +++ /dev/null @@ -1,379 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef SEARCH_H_INCLUDED -#define SEARCH_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "history.h" -#include "misc.h" -#include "nnue/network.h" -#include "nnue/nnue_accumulator.h" -#include "numa.h" -#include "position.h" -#include "score.h" -#include "syzygy/tbprobe.h" -#include "timeman.h" -#include "types.h" - -namespace Stockfish { - -// Different node types, used as a template parameter -enum NodeType { - NonPV, - PV, - Root -}; - -class TranspositionTable; -class ThreadPool; -class OptionsMap; - -namespace Search { - -// Stack struct keeps track of the information we need to remember from nodes -// shallower and deeper in the tree during the search. Each search thread has -// its own array of Stack objects, indexed by the current ply. -struct Stack { - Move* pv; - PieceToHistory* continuationHistory; - CorrectionHistory* continuationCorrectionHistory; - int ply; - Move currentMove; - Move excludedMove; - Value staticEval; - int statScore; - int moveCount; - bool inCheck; - bool ttPv; - bool ttHit; - int cutoffCnt; - int reduction; -}; - - -// RootMove struct is used for moves at the root of the tree. For each root move -// we store a score and a PV (really a refutation in the case of moves which -// fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves. -struct RootMove { - - explicit RootMove(Move m) : - pv(1, m) {} - bool extract_ponder_from_tt(const TranspositionTable& tt, Position& pos); - bool operator==(const Move& m) const { return pv[0] == m; } - // Sort in descending order - bool operator<(const RootMove& m) const { - return m.score != score ? m.score < score : m.previousScore < previousScore; - } - - uint64_t effort = 0; - Value score = -VALUE_INFINITE; - Value previousScore = -VALUE_INFINITE; - Value averageScore = -VALUE_INFINITE; - Value meanSquaredScore = -VALUE_INFINITE * VALUE_INFINITE; - Value uciScore = -VALUE_INFINITE; - bool scoreLowerbound = false; - bool scoreUpperbound = false; - int selDepth = 0; - int tbRank = 0; - Value tbScore; - std::vector pv; -}; - -using RootMoves = std::vector; - - -// LimitsType struct stores information sent by the caller about the analysis required. -struct LimitsType { - - // Init explicitly due to broken value-initialization of non POD in MSVC - LimitsType() { - time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0); - movestogo = depth = mate = perft = infinite = 0; - nodes = 0; - ponderMode = false; - } - - bool use_time_management() const { return time[WHITE] || time[BLACK]; } - - std::vector searchmoves; - TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime; - int movestogo, depth, mate, perft, infinite; - uint64_t nodes; - bool ponderMode; -}; - - -// The UCI stores the uci options, thread pool, and transposition table. -// This struct is used to easily forward data to the Search::Worker class. -struct SharedState { - SharedState(const OptionsMap& optionsMap, - ThreadPool& threadPool, - TranspositionTable& transpositionTable, - std::map& sharedHists, - const LazyNumaReplicatedSystemWide& nets) : - options(optionsMap), - threads(threadPool), - tt(transpositionTable), - sharedHistories(sharedHists), - networks(nets) {} - - const OptionsMap& options; - ThreadPool& threads; - TranspositionTable& tt; - std::map& sharedHistories; - const LazyNumaReplicatedSystemWide& networks; -}; - -class Worker; - -// Null Object Pattern, implement a common interface for the SearchManagers. -// A Null Object will be given to non-mainthread workers. -class ISearchManager { - public: - virtual ~ISearchManager() {} - virtual void check_time(Search::Worker&) = 0; -}; - -struct InfoShort { - int depth; - Score score; -}; - -struct InfoFull: InfoShort { - int selDepth; - size_t multiPV; - std::string_view wdl; - std::string_view bound; - size_t timeMs; - size_t nodes; - size_t nps; - size_t tbHits; - std::string_view pv; - int hashfull; -}; - -struct InfoIteration { - int depth; - std::string_view currmove; - size_t currmovenumber; -}; - -// Skill structure is used to implement strength limit. If we have a UCI_Elo, -// we convert it to an appropriate skill level, anchored to the Stash engine. -// This method is based on a fit of the Elo results for games played between -// Stockfish at various skill levels and various versions of the Stash engine. -// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately -// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2 -struct Skill { - // Lowest and highest Elo ratings used in the skill level calculation - constexpr static int LowestElo = 1320; - constexpr static int HighestElo = 3190; - - Skill(int skill_level, int uci_elo) { - if (uci_elo) - { - double e = double(uci_elo - LowestElo) / (HighestElo - LowestElo); - level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0); - } - else - level = double(skill_level); - } - bool enabled() const { return level < 20.0; } - bool time_to_pick(Depth depth) const { return depth == 1 + int(level); } - Move pick_best(const RootMoves&, size_t multiPV); - - double level; - Move best = Move::none(); -}; - -// SearchManager manages the search from the main thread. It is responsible for -// keeping track of the time, and storing data strictly related to the main thread. -class SearchManager: public ISearchManager { - public: - using UpdateShort = std::function; - using UpdateFull = std::function; - using UpdateIter = std::function; - using UpdateBestmove = std::function; - - struct UpdateContext { - UpdateShort onUpdateNoMoves; - UpdateFull onUpdateFull; - UpdateIter onIter; - UpdateBestmove onBestmove; - }; - - - SearchManager(const UpdateContext& updateContext) : - updates(updateContext) {} - - void check_time(Search::Worker& worker) override; - - void pv(Search::Worker& worker, - const ThreadPool& threads, - const TranspositionTable& tt, - Depth depth); - - Stockfish::TimeManagement tm; - double originalTimeAdjust; - int callsCnt; - std::atomic_bool ponder; - - std::array iterValue; - double previousTimeReduction; - Value bestPreviousScore; - Value bestPreviousAverageScore; - bool stopOnPonderhit; - - size_t id; - - const UpdateContext& updates; -}; - -class NullSearchManager: public ISearchManager { - public: - void check_time(Search::Worker&) override {} -}; - -// Search::Worker is the class that does the actual search. -// It is instantiated once per thread, and it is responsible for keeping track -// of the search history, and storing data required for the search. -class Worker { - public: - Worker(SharedState&, - std::unique_ptr, - size_t, - size_t, - size_t, - NumaReplicatedAccessToken); - - // Called at instantiation to initialize reductions tables. - // Reset histories, usually before a new game. - void clear(); - - // Called when the program receives the UCI 'go' command. - // It searches from the root position and outputs the "bestmove". - void start_searching(); - - bool is_mainthread() const { return threadIdx == 0; } - - void ensure_network_replicated(); - - // Public because they need to be updatable by the stats - ButterflyHistory mainHistory; - LowPlyHistory lowPlyHistory; - - CapturePieceToHistory captureHistory; - ContinuationHistory continuationHistory[2][2]; - CorrectionHistory continuationCorrectionHistory; - - TTMoveHistory ttMoveHistory; - SharedHistories& sharedHistory; - - private: - void iterative_deepening(); - - void do_move(Position& pos, const Move move, StateInfo& st, Stack* const ss); - void - do_move(Position& pos, const Move move, StateInfo& st, const bool givesCheck, Stack* const ss); - void do_null_move(Position& pos, StateInfo& st, Stack* const ss); - void undo_move(Position& pos, const Move move); - void undo_null_move(Position& pos); - - // This is the main search function, for both PV and non-PV nodes - template - Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); - - // Quiescence search function, which is called by the main search - template - Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta); - - Depth reduction(bool i, Depth d, int mn, int delta) const; - - // Pointer to the search manager, only allowed to be called by the main thread - SearchManager* main_manager() const { - assert(threadIdx == 0); - return static_cast(manager.get()); - } - - TimePoint elapsed() const; - TimePoint elapsed_time() const; - - Value evaluate(const Position&); - - LimitsType limits; - - size_t pvIdx, pvLast; - std::atomic nodes, tbHits, bestMoveChanges; - int selDepth, nmpMinPly; - - Value optimism[COLOR_NB]; - - Position rootPos; - StateInfo rootState; - RootMoves rootMoves; - Depth rootDepth, completedDepth; - Value rootDelta; - - size_t threadIdx, numaThreadIdx, numaTotal; - NumaReplicatedAccessToken numaAccessToken; - - // Reductions lookup table initialized at startup - std::array reductions; // [depth or moveNumber] - - // The main thread has a SearchManager, the others have a NullSearchManager - std::unique_ptr manager; - - Tablebases::Config tbConfig; - - const OptionsMap& options; - ThreadPool& threads; - TranspositionTable& tt; - const LazyNumaReplicatedSystemWide& networks; - - // Used by NNUE - Eval::NNUE::AccumulatorStack accumulatorStack; - Eval::NNUE::AccumulatorCaches refreshTable; - - friend class Stockfish::ThreadPool; - friend class SearchManager; -}; - -struct ConthistBonus { - int index; - int weight; -}; - - -} // namespace Search - -} // namespace Stockfish - -#endif // #ifndef SEARCH_H_INCLUDED diff --git a/src/shm.h b/src/shm.h deleted file mode 100644 index d581bf08ac7f91b9975bfd4c9cd1f80c8aa0dd9b..0000000000000000000000000000000000000000 --- a/src/shm.h +++ /dev/null @@ -1,634 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef SHM_H_INCLUDED -#define SHM_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(__linux__) && !defined(__ANDROID__) - #include "shm_linux.h" -#endif - -#if defined(__ANDROID__) - #include - #define SF_MAX_SEM_NAME_LEN NAME_MAX -#endif - -#include "types.h" - -#include "memory.h" - -#if defined(_WIN32) - - #if _WIN32_WINNT < 0x0601 - #undef _WIN32_WINNT - #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes - #endif - - #if !defined(NOMINMAX) - #define NOMINMAX - #endif - #include -#elif defined(__linux__) - #include - #include - #include - #include - #include - #include - #include -#endif - - -#if defined(__APPLE__) - #include - #include - -#elif defined(__sun) - #include - -#elif defined(__FreeBSD__) - #include - #include - #include - -#elif defined(__NetBSD__) || defined(__DragonFly__) || defined(__linux__) - #include - #include -#endif - - -namespace Stockfish { - -// argv[0] CANNOT be used because we need to identify the executable. -// argv[0] contains the command used to invoke it, which does not involve the full path. -// Just using a path is not fully resilient either, as the executable could -// have changed if it wasn't locked by the OS. Ideally we would hash the executable -// but it's not really that important at this point. -// If the path is longer than 4095 bytes the hash will be computed from an unspecified -// amount of bytes of the path; in particular it can a hash of an empty string. - -inline std::string getExecutablePathHash() { - char executable_path[4096] = {0}; - std::size_t path_length = 0; - -#if defined(_WIN32) - path_length = GetModuleFileNameA(NULL, executable_path, sizeof(executable_path)); - -#elif defined(__APPLE__) - uint32_t size = sizeof(executable_path); - if (_NSGetExecutablePath(executable_path, &size) == 0) - { - path_length = std::strlen(executable_path); - } - -#elif defined(__sun) // Solaris - const char* path = getexecname(); - if (path) - { - std::strncpy(executable_path, path, sizeof(executable_path) - 1); - path_length = std::strlen(executable_path); - } - -#elif defined(__FreeBSD__) - size_t size = sizeof(executable_path); - int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; - if (sysctl(mib, 4, executable_path, &size, NULL, 0) == 0) - { - path_length = std::strlen(executable_path); - } - -#elif defined(__NetBSD__) || defined(__DragonFly__) - ssize_t len = readlink("/proc/curproc/exe", executable_path, sizeof(executable_path) - 1); - if (len >= 0) - { - executable_path[len] = '\0'; - path_length = len; - } - -#elif defined(__linux__) - ssize_t len = readlink("/proc/self/exe", executable_path, sizeof(executable_path) - 1); - if (len >= 0) - { - executable_path[len] = '\0'; - path_length = len; - } - -#endif - - // In case of any error the path will be empty. - return std::string(executable_path, path_length); -} - -enum class SystemWideSharedConstantAllocationStatus { - NoAllocation, - LocalMemory, - SharedMemory -}; - -#if defined(_WIN32) - -inline std::string GetLastErrorAsString(DWORD error) { - //Get the error message ID, if any. - DWORD errorMessageID = error; - if (errorMessageID == 0) - { - return std::string(); //No error message has been recorded - } - - LPSTR messageBuffer = nullptr; - - //Ask Win32 to give us the string version of that message ID. - //The parameters we pass in, tell Win32 to create the buffer that holds the message for us (because we don't yet know how long the message string will be). - size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM - | FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPSTR) &messageBuffer, 0, NULL); - - //Copy the error message into a std::string. - std::string message(messageBuffer, size); - - //Free the Win32's string's buffer. - LocalFree(messageBuffer); - - return message; -} - -// Utilizes shared memory to store the value. It is deduplicated system-wide (for the single user). -template -class SharedMemoryBackend { - public: - enum class Status { - Success, - LargePageAllocationError, - FileMappingError, - MapViewError, - MutexCreateError, - MutexWaitError, - MutexReleaseError, - NotInitialized - }; - - static constexpr DWORD IS_INITIALIZED_VALUE = 1; - - SharedMemoryBackend() : - status(Status::NotInitialized) {}; - - SharedMemoryBackend(const std::string& shm_name, const T& value) : - status(Status::NotInitialized) { - - initialize(shm_name, value); - } - - bool is_valid() const { return status == Status::Success; } - - std::optional get_error_message() const { - switch (status) - { - case Status::Success : - return std::nullopt; - case Status::LargePageAllocationError : - return "Failed to allocate large page memory"; - case Status::FileMappingError : - return "Failed to create file mapping: " + last_error_message; - case Status::MapViewError : - return "Failed to map view: " + last_error_message; - case Status::MutexCreateError : - return "Failed to create mutex: " + last_error_message; - case Status::MutexWaitError : - return "Failed to wait on mutex: " + last_error_message; - case Status::MutexReleaseError : - return "Failed to release mutex: " + last_error_message; - case Status::NotInitialized : - return "Not initialized"; - default : - return "Unknown error"; - } - } - - void* get() const { return is_valid() ? pMap : nullptr; } - - ~SharedMemoryBackend() { cleanup(); } - - SharedMemoryBackend(const SharedMemoryBackend&) = delete; - SharedMemoryBackend& operator=(const SharedMemoryBackend&) = delete; - - SharedMemoryBackend(SharedMemoryBackend&& other) noexcept : - pMap(other.pMap), - hMapFile(other.hMapFile), - status(other.status), - last_error_message(std::move(other.last_error_message)) { - - other.pMap = nullptr; - other.hMapFile = 0; - other.status = Status::NotInitialized; - } - - SharedMemoryBackend& operator=(SharedMemoryBackend&& other) noexcept { - if (this != &other) - { - cleanup(); - pMap = other.pMap; - hMapFile = other.hMapFile; - status = other.status; - last_error_message = std::move(other.last_error_message); - - other.pMap = nullptr; - other.hMapFile = 0; - other.status = Status::NotInitialized; - } - return *this; - } - - SystemWideSharedConstantAllocationStatus get_status() const { - return status == Status::Success ? SystemWideSharedConstantAllocationStatus::SharedMemory - : SystemWideSharedConstantAllocationStatus::NoAllocation; - } - - private: - void initialize(const std::string& shm_name, const T& value) { - const size_t total_size = sizeof(T) + sizeof(IS_INITIALIZED_VALUE); - - // Try allocating with large pages first. - hMapFile = windows_try_with_large_page_priviliges( - [&](size_t largePageSize) { - const size_t total_size_aligned = - (total_size + largePageSize - 1) / largePageSize * largePageSize; - - #if defined(_WIN64) - DWORD total_size_low = total_size_aligned & 0xFFFFFFFFu; - DWORD total_size_high = total_size_aligned >> 32u; - #else - DWORD total_size_low = total_size_aligned; - DWORD total_size_high = 0; - #endif - - return CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, - PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES, - total_size_high, total_size_low, shm_name.c_str()); - }, - []() { return (void*) nullptr; }); - - // Fallback to normal allocation if no large pages available. - if (!hMapFile) - { - hMapFile = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, - static_cast(total_size), shm_name.c_str()); - } - - if (!hMapFile) - { - const DWORD err = GetLastError(); - last_error_message = GetLastErrorAsString(err); - status = Status::FileMappingError; - return; - } - - pMap = MapViewOfFile(hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, total_size); - if (!pMap) - { - const DWORD err = GetLastError(); - last_error_message = GetLastErrorAsString(err); - status = Status::MapViewError; - cleanup_partial(); - return; - } - - // Use named mutex to ensure only one initializer - std::string mutex_name = shm_name + "$mutex"; - HANDLE hMutex = CreateMutexA(NULL, FALSE, mutex_name.c_str()); - if (!hMutex) - { - const DWORD err = GetLastError(); - last_error_message = GetLastErrorAsString(err); - status = Status::MutexCreateError; - cleanup_partial(); - return; - } - - DWORD wait_result = WaitForSingleObject(hMutex, INFINITE); - if (wait_result != WAIT_OBJECT_0) - { - const DWORD err = GetLastError(); - last_error_message = GetLastErrorAsString(err); - status = Status::MutexWaitError; - CloseHandle(hMutex); - cleanup_partial(); - return; - } - - // Crucially, we place the object first to ensure alignment. - volatile DWORD* is_initialized = - std::launder(reinterpret_cast(reinterpret_cast(pMap) + sizeof(T))); - T* object = std::launder(reinterpret_cast(pMap)); - - if (*is_initialized != IS_INITIALIZED_VALUE) - { - // First time initialization, message for debug purposes - new (object) T{value}; - *is_initialized = IS_INITIALIZED_VALUE; - } - - BOOL release_result = ReleaseMutex(hMutex); - CloseHandle(hMutex); - - if (!release_result) - { - const DWORD err = GetLastError(); - last_error_message = GetLastErrorAsString(err); - status = Status::MutexReleaseError; - cleanup_partial(); - return; - } - - status = Status::Success; - } - - void cleanup_partial() { - if (pMap != nullptr) - { - UnmapViewOfFile(pMap); - pMap = nullptr; - } - if (hMapFile) - { - CloseHandle(hMapFile); - hMapFile = 0; - } - } - - void cleanup() { - if (pMap != nullptr) - { - UnmapViewOfFile(pMap); - pMap = nullptr; - } - if (hMapFile) - { - CloseHandle(hMapFile); - hMapFile = 0; - } - } - - void* pMap = nullptr; - HANDLE hMapFile = 0; - Status status = Status::NotInitialized; - std::string last_error_message; -}; - -#elif defined(__linux__) && !defined(__ANDROID__) - -template -class SharedMemoryBackend { - public: - SharedMemoryBackend() = default; - - SharedMemoryBackend(const std::string& shm_name, const T& value) : - shm1(shm::create_shared(shm_name, value)) {} - - void* get() const { - const T* ptr = &shm1->get(); - return reinterpret_cast(const_cast(ptr)); - } - - bool is_valid() const { return shm1 && shm1->is_open() && shm1->is_initialized(); } - - SystemWideSharedConstantAllocationStatus get_status() const { - return is_valid() ? SystemWideSharedConstantAllocationStatus::SharedMemory - : SystemWideSharedConstantAllocationStatus::NoAllocation; - } - - std::optional get_error_message() const { - if (!shm1) - return "Shared memory not initialized"; - - if (!shm1->is_open()) - return "Shared memory is not open"; - - if (!shm1->is_initialized()) - return "Not initialized"; - - return std::nullopt; - } - - private: - std::optional> shm1; -}; - -#else - -// For systems that don't have shared memory, or support is troublesome. -// The way fallback is done is that we need a dummy backend. - -template -class SharedMemoryBackend { - public: - SharedMemoryBackend() = default; - - SharedMemoryBackend([[maybe_unused]] const std::string& shm_name, - [[maybe_unused]] const T& value) {} - - void* get() const { return nullptr; } - - bool is_valid() const { return false; } - - SystemWideSharedConstantAllocationStatus get_status() const { - return SystemWideSharedConstantAllocationStatus::NoAllocation; - } - - std::optional get_error_message() const { return "Dummy SharedMemoryBackend"; } -}; - -#endif - -template -struct SharedMemoryBackendFallback { - SharedMemoryBackendFallback() = default; - - SharedMemoryBackendFallback(const std::string&, const T& value) : - fallback_object(make_unique_large_page(value)) {} - - void* get() const { return fallback_object.get(); } - - SharedMemoryBackendFallback(const SharedMemoryBackendFallback&) = delete; - SharedMemoryBackendFallback& operator=(const SharedMemoryBackendFallback&) = delete; - - SharedMemoryBackendFallback(SharedMemoryBackendFallback&& other) noexcept : - fallback_object(std::move(other.fallback_object)) {} - - SharedMemoryBackendFallback& operator=(SharedMemoryBackendFallback&& other) noexcept { - fallback_object = std::move(other.fallback_object); - return *this; - } - - SystemWideSharedConstantAllocationStatus get_status() const { - return fallback_object == nullptr ? SystemWideSharedConstantAllocationStatus::NoAllocation - : SystemWideSharedConstantAllocationStatus::LocalMemory; - } - - std::optional get_error_message() const { - if (fallback_object == nullptr) - return "Not initialized"; - - return "Shared memory not supported by the OS. Local allocation fallback."; - } - - private: - LargePagePtr fallback_object; -}; - -// Platform-independent wrapper -template -struct SystemWideSharedConstant { - private: - static std::string createHashString(const std::string& input) { - char buf[1024]; - std::snprintf(buf, sizeof(buf), "%016" PRIx64, hash_string(input)); - return buf; - } - - public: - // We can't run the destructor because it may be in a completely different process. - // The object stored must also be obviously in-line but we can't check for that, other than some basic checks that cover most cases. - static_assert(std::is_trivially_destructible_v); - static_assert(std::is_trivially_move_constructible_v); - static_assert(std::is_trivially_copy_constructible_v); - - SystemWideSharedConstant() = default; - - - // Content is addressed by its hash. An additional discriminator can be added to account for differences - // that are not present in the content, for example NUMA node allocation. - SystemWideSharedConstant(const T& value, std::size_t discriminator = 0) { - std::size_t content_hash = std::hash{}(value); - std::size_t executable_hash = hash_string(getExecutablePathHash()); - - char buf[1024]; - std::snprintf(buf, sizeof(buf), "Local\\sf_%zu$%zu$%zu", content_hash, executable_hash, - discriminator); - std::string shm_name = buf; - -#if defined(__linux__) && !defined(__ANDROID__) - // POSIX shared memory names must start with a slash - shm_name = "/sf_" + createHashString(shm_name); - - // hash name and make sure it is not longer than SF_MAX_SEM_NAME_LEN - if (shm_name.size() > SF_MAX_SEM_NAME_LEN) - { - shm_name = shm_name.substr(0, SF_MAX_SEM_NAME_LEN - 1); - } -#endif - - SharedMemoryBackend shm_backend(shm_name, value); - - if (shm_backend.is_valid()) - { - backend = std::move(shm_backend); - } - else - { - backend = SharedMemoryBackendFallback(shm_name, value); - } - } - - SystemWideSharedConstant(const SystemWideSharedConstant&) = delete; - SystemWideSharedConstant& operator=(const SystemWideSharedConstant&) = delete; - - SystemWideSharedConstant(SystemWideSharedConstant&& other) noexcept : - backend(std::move(other.backend)) {} - - SystemWideSharedConstant& operator=(SystemWideSharedConstant&& other) noexcept { - backend = std::move(other.backend); - return *this; - } - - const T& operator*() const { return *std::launder(reinterpret_cast(get_ptr())); } - - bool operator==(std::nullptr_t) const noexcept { return get_ptr() == nullptr; } - - bool operator!=(std::nullptr_t) const noexcept { return get_ptr() != nullptr; } - - SystemWideSharedConstantAllocationStatus get_status() const { - return std::visit( - [](const auto& end) -> SystemWideSharedConstantAllocationStatus { - if constexpr (std::is_same_v, std::monostate>) - { - return SystemWideSharedConstantAllocationStatus::NoAllocation; - } - else - { - return end.get_status(); - } - }, - backend); - } - - std::optional get_error_message() const { - return std::visit( - [](const auto& end) -> std::optional { - if constexpr (std::is_same_v, std::monostate>) - { - return std::nullopt; - } - else - { - return end.get_error_message(); - } - }, - backend); - } - - private: - auto get_ptr() const { - return std::visit( - [](const auto& end) -> void* { - if constexpr (std::is_same_v, std::monostate>) - { - return nullptr; - } - else - { - return end.get(); - } - }, - backend); - } - - std::variant, SharedMemoryBackendFallback> backend; -}; - - -} // namespace Stockfish - -#endif // #ifndef SHM_H_INCLUDED diff --git a/src/shm_linux.h b/src/shm_linux.h deleted file mode 100644 index 29c9e90f5aa81c2b37b763da80051232eb92ffa4..0000000000000000000000000000000000000000 --- a/src/shm_linux.h +++ /dev/null @@ -1,672 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef SHM_LINUX_H_INCLUDED -#define SHM_LINUX_H_INCLUDED - -#if !defined(__linux__) || defined(__ANDROID__) - #error shm_linux.h should not be included on this platform. -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#define SF_MAX_SEM_NAME_LEN NAME_MAX - -#include "misc.h" - -namespace Stockfish::shm { - -namespace detail { - -struct ShmHeader { - static constexpr uint32_t SHM_MAGIC = 0xAD5F1A12; - pthread_mutex_t mutex; - std::atomic ref_count{0}; - std::atomic initialized{false}; - uint32_t magic = SHM_MAGIC; -}; - -class SharedMemoryBase { - public: - virtual ~SharedMemoryBase() = default; - virtual void close(bool skip_unmap = false) noexcept = 0; - virtual const std::string& name() const noexcept = 0; -}; - -class SharedMemoryRegistry { - private: - static std::mutex registry_mutex_; - static std::vector active_instances_; - - public: - static void register_instance(SharedMemoryBase* instance) { - std::scoped_lock lock(registry_mutex_); - active_instances_.push_back(instance); - } - - static void unregister_instance(SharedMemoryBase* instance) { - std::scoped_lock lock(registry_mutex_); - active_instances_.erase( - std::remove(active_instances_.begin(), active_instances_.end(), instance), - active_instances_.end()); - } - - static void cleanup_all(bool skip_unmap = false) noexcept { - std::scoped_lock lock(registry_mutex_); - for (auto* instance : active_instances_) - instance->close(skip_unmap); - active_instances_.clear(); - } -}; - -inline std::mutex SharedMemoryRegistry::registry_mutex_; -inline std::vector SharedMemoryRegistry::active_instances_; - -class CleanupHooks { - private: - static std::once_flag register_once_; - - static void handle_signal(int sig) noexcept { - // Search threads may still be running, so skip munmap (but still perform - // other cleanup actions). The memory mappings will be released on exit. - SharedMemoryRegistry::cleanup_all(true); - - // Invoke the default handler, which will exit - struct sigaction sa; - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - sa.sa_flags = 0; - if (sigaction(sig, &sa, nullptr) == -1) - _Exit(128 + sig); - - raise(sig); - } - - static void register_signal_handlers() noexcept { - std::atexit([]() { SharedMemoryRegistry::cleanup_all(true); }); - - constexpr int signals[] = {SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGABRT, SIGFPE, - SIGSEGV, SIGTERM, SIGBUS, SIGSYS, SIGXCPU, SIGXFSZ}; - - struct sigaction sa; - sa.sa_handler = handle_signal; - sigemptyset(&sa.sa_mask); - sa.sa_flags = 0; - - for (int sig : signals) - sigaction(sig, &sa, nullptr); - } - - public: - static void ensure_registered() noexcept { - std::call_once(register_once_, register_signal_handlers); - } -}; - -inline std::once_flag CleanupHooks::register_once_; - - -inline int portable_fallocate(int fd, off_t offset, off_t length) { -#ifdef __APPLE__ - fstore_t store = {F_ALLOCATECONTIG, F_PEOFPOSMODE, offset, length, 0}; - int ret = fcntl(fd, F_PREALLOCATE, &store); - if (ret == -1) - { - store.fst_flags = F_ALLOCATEALL; - ret = fcntl(fd, F_PREALLOCATE, &store); - } - if (ret != -1) - ret = ftruncate(fd, offset + length); - return ret; -#else - return posix_fallocate(fd, offset, length); -#endif -} - -} // namespace detail - -template -class SharedMemory: public detail::SharedMemoryBase { - static_assert(std::is_trivially_copyable_v, "T must be trivially copyable"); - static_assert(!std::is_pointer_v, "T cannot be a pointer type"); - - private: - std::string name_; - int fd_ = -1; - void* mapped_ptr_ = nullptr; - T* data_ptr_ = nullptr; - detail::ShmHeader* header_ptr_ = nullptr; - size_t total_size_ = 0; - std::string sentinel_base_; - std::string sentinel_path_; - - static constexpr size_t calculate_total_size() noexcept { - return sizeof(T) + sizeof(detail::ShmHeader); - } - - static std::string make_sentinel_base(const std::string& name) { - char buf[32]; - // Using std::to_string here causes non-deterministic PGO builds. - // snprintf, being part of libc, is insensitive to the formatted values. - std::snprintf(buf, sizeof(buf), "sfshm_%016" PRIu64, hash_string(name)); - return buf; - } - - public: - explicit SharedMemory(const std::string& name) noexcept : - name_(name), - total_size_(calculate_total_size()), - sentinel_base_(make_sentinel_base(name)) {} - - ~SharedMemory() noexcept override { - detail::SharedMemoryRegistry::unregister_instance(this); - close(); - } - - SharedMemory(const SharedMemory&) = delete; - SharedMemory& operator=(const SharedMemory&) = delete; - - SharedMemory(SharedMemory&& other) noexcept : - name_(std::move(other.name_)), - fd_(other.fd_), - mapped_ptr_(other.mapped_ptr_), - data_ptr_(other.data_ptr_), - header_ptr_(other.header_ptr_), - total_size_(other.total_size_), - sentinel_base_(std::move(other.sentinel_base_)), - sentinel_path_(std::move(other.sentinel_path_)) { - - detail::SharedMemoryRegistry::unregister_instance(&other); - detail::SharedMemoryRegistry::register_instance(this); - other.reset(); - } - - SharedMemory& operator=(SharedMemory&& other) noexcept { - if (this != &other) - { - detail::SharedMemoryRegistry::unregister_instance(this); - close(); - - name_ = std::move(other.name_); - fd_ = other.fd_; - mapped_ptr_ = other.mapped_ptr_; - data_ptr_ = other.data_ptr_; - header_ptr_ = other.header_ptr_; - total_size_ = other.total_size_; - sentinel_base_ = std::move(other.sentinel_base_); - sentinel_path_ = std::move(other.sentinel_path_); - - detail::SharedMemoryRegistry::unregister_instance(&other); - detail::SharedMemoryRegistry::register_instance(this); - - other.reset(); - } - return *this; - } - - [[nodiscard]] bool open(const T& initial_value) noexcept { - detail::CleanupHooks::ensure_registered(); - - bool retried_stale = false; - - while (true) - { - if (is_open()) - return false; - - bool created_new = false; - fd_ = shm_open(name_.c_str(), O_CREAT | O_EXCL | O_RDWR, 0666); - - if (fd_ == -1) - { - fd_ = shm_open(name_.c_str(), O_RDWR, 0666); - if (fd_ == -1) - return false; - } - else - created_new = true; - - if (!lock_file(LOCK_EX)) - { - ::close(fd_); - reset(); - return false; - } - - bool invalid_header = false; - bool success = - created_new ? setup_new_region(initial_value) : setup_existing_region(invalid_header); - - if (!success) - { - if (created_new || invalid_header) - shm_unlink(name_.c_str()); - if (mapped_ptr_) - unmap_region(); - unlock_file(); - ::close(fd_); - reset(); - - if (!created_new && invalid_header && !retried_stale) - { - retried_stale = true; - continue; - } - return false; - } - - if (!lock_shared_mutex()) - { - if (created_new) - shm_unlink(name_.c_str()); - if (mapped_ptr_) - unmap_region(); - unlock_file(); - ::close(fd_); - reset(); - - if (!created_new && !retried_stale) - { - retried_stale = true; - continue; - } - return false; - } - - if (!create_sentinel_file_locked()) - { - unlock_shared_mutex(); - unmap_region(); - if (created_new) - shm_unlink(name_.c_str()); - unlock_file(); - ::close(fd_); - reset(); - return false; - } - - header_ptr_->ref_count.fetch_add(1, std::memory_order_acq_rel); - - unlock_shared_mutex(); - unlock_file(); - detail::SharedMemoryRegistry::register_instance(this); - return true; - } - } - - void close(bool skip_unmap = false) noexcept override { - if (fd_ == -1 && mapped_ptr_ == nullptr) - return; - - bool remove_region = false; - bool file_locked = lock_file(LOCK_EX); - bool mutex_locked = false; - - if (file_locked && header_ptr_ != nullptr) - mutex_locked = lock_shared_mutex(); - - if (mutex_locked) - { - if (header_ptr_) - { - header_ptr_->ref_count.fetch_sub(1, std::memory_order_acq_rel); - } - remove_sentinel_file(); - remove_region = !has_other_live_sentinels_locked(); - unlock_shared_mutex(); - } - else - { - remove_sentinel_file(); - decrement_refcount_relaxed(); - } - - if (skip_unmap) - mapped_ptr_ = nullptr; - else - unmap_region(); - - if (remove_region) - shm_unlink(name_.c_str()); - - if (file_locked) - unlock_file(); - - if (fd_ != -1) - { - ::close(fd_); - fd_ = -1; - } - - if (!skip_unmap) - reset(); - } - - const std::string& name() const noexcept override { return name_; } - - [[nodiscard]] bool is_open() const noexcept { return fd_ != -1 && mapped_ptr_ && data_ptr_; } - - [[nodiscard]] const T& get() const noexcept { return *data_ptr_; } - - [[nodiscard]] const T* operator->() const noexcept { return data_ptr_; } - - [[nodiscard]] const T& operator*() const noexcept { return *data_ptr_; } - - [[nodiscard]] uint32_t ref_count() const noexcept { - return header_ptr_ ? header_ptr_->ref_count.load(std::memory_order_acquire) : 0; - } - - [[nodiscard]] bool is_initialized() const noexcept { - return header_ptr_ ? header_ptr_->initialized.load(std::memory_order_acquire) : false; - } - - static void cleanup_all_instances() noexcept { detail::SharedMemoryRegistry::cleanup_all(); } - - private: - void reset() noexcept { - fd_ = -1; - mapped_ptr_ = nullptr; - data_ptr_ = nullptr; - header_ptr_ = nullptr; - sentinel_path_.clear(); - } - - void unmap_region() noexcept { - if (mapped_ptr_) - { - munmap(mapped_ptr_, total_size_); - mapped_ptr_ = nullptr; - data_ptr_ = nullptr; - header_ptr_ = nullptr; - } - } - - [[nodiscard]] bool lock_file(int operation) noexcept { - if (fd_ == -1) - return false; - - while (flock(fd_, operation) == -1) - { - if (errno == EINTR) - continue; - return false; - } - return true; - } - - void unlock_file() noexcept { - if (fd_ == -1) - return; - - while (flock(fd_, LOCK_UN) == -1) - { - if (errno == EINTR) - continue; - break; - } - } - - std::string sentinel_full_path(pid_t pid) const { - char buf[1024]; - // See above snprintf comment - std::snprintf(buf, sizeof(buf), "/dev/shm/%s.%ld", sentinel_base_.c_str(), long(pid)); - return buf; - } - - void decrement_refcount_relaxed() noexcept { - if (!header_ptr_) - return; - - uint32_t expected = header_ptr_->ref_count.load(std::memory_order_relaxed); - while (expected != 0 - && !header_ptr_->ref_count.compare_exchange_weak( - expected, expected - 1, std::memory_order_acq_rel, std::memory_order_relaxed)) - {} - } - - bool create_sentinel_file_locked() noexcept { - if (!header_ptr_) - return false; - - const pid_t self_pid = getpid(); - sentinel_path_ = sentinel_full_path(self_pid); - - for (int attempt = 0; attempt < 2; ++attempt) - { - int fd = ::open(sentinel_path_.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0600); - if (fd != -1) - { - ::close(fd); - return true; - } - - if (errno == EEXIST) - { - ::unlink(sentinel_path_.c_str()); - decrement_refcount_relaxed(); - continue; - } - - break; - } - - sentinel_path_.clear(); - return false; - } - - void remove_sentinel_file() noexcept { - if (!sentinel_path_.empty()) - { - ::unlink(sentinel_path_.c_str()); - sentinel_path_.clear(); - } - } - - static bool pid_is_alive(pid_t pid) noexcept { - if (pid <= 0) - return false; - - if (kill(pid, 0) == 0) - return true; - - return errno == EPERM; - } - - [[nodiscard]] bool initialize_shared_mutex() noexcept { - if (!header_ptr_) - return false; - - pthread_mutexattr_t attr; - if (pthread_mutexattr_init(&attr) != 0) - return false; - - bool success = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) == 0; -#if _POSIX_C_SOURCE >= 200809L - if (success) - success = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) == 0; -#endif - - if (success) - success = pthread_mutex_init(&header_ptr_->mutex, &attr) == 0; - - pthread_mutexattr_destroy(&attr); - return success; - } - - [[nodiscard]] bool lock_shared_mutex() noexcept { - if (!header_ptr_) - return false; - - while (true) - { - int rc = pthread_mutex_lock(&header_ptr_->mutex); - if (rc == 0) - return true; - -#if _POSIX_C_SOURCE >= 200809L - if (rc == EOWNERDEAD) - { - if (pthread_mutex_consistent(&header_ptr_->mutex) == 0) - return true; - return false; - } -#endif - - if (rc == EINTR) - continue; - - return false; - } - } - - void unlock_shared_mutex() noexcept { - if (header_ptr_) - pthread_mutex_unlock(&header_ptr_->mutex); - } - - bool has_other_live_sentinels_locked() const noexcept { - DIR* dir = opendir("/dev/shm"); - if (!dir) - return false; - - std::string prefix = sentinel_base_ + "."; - bool found = false; - - while (dirent* entry = readdir(dir)) - { - std::string name = entry->d_name; - if (name.rfind(prefix, 0) != 0) - continue; - - auto pid_str = name.substr(prefix.size()); - char* end = nullptr; - long value = std::strtol(pid_str.c_str(), &end, 10); - if (!end || *end != '\0') - continue; - - pid_t pid = static_cast(value); - if (pid_is_alive(pid)) - { - found = true; - break; - } - - std::string stale_path = std::string("/dev/shm/") + name; - ::unlink(stale_path.c_str()); - const_cast(this)->decrement_refcount_relaxed(); - } - - closedir(dir); - return found; - } - - [[nodiscard]] bool setup_new_region(const T& initial_value) noexcept { - if (ftruncate(fd_, static_cast(total_size_)) == -1) - return false; - - if (detail::portable_fallocate(fd_, 0, static_cast(total_size_)) != 0) - return false; - - mapped_ptr_ = mmap(nullptr, total_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0); - if (mapped_ptr_ == MAP_FAILED) - { - mapped_ptr_ = nullptr; - return false; - } - - data_ptr_ = static_cast(mapped_ptr_); - header_ptr_ = - reinterpret_cast(static_cast(mapped_ptr_) + sizeof(T)); - - new (header_ptr_) detail::ShmHeader{}; - new (data_ptr_) T{initial_value}; - - if (!initialize_shared_mutex()) - return false; - - header_ptr_->ref_count.store(0, std::memory_order_release); - header_ptr_->initialized.store(true, std::memory_order_release); - return true; - } - - [[nodiscard]] bool setup_existing_region(bool& invalid_header) noexcept { - invalid_header = false; - - struct stat st; - fstat(fd_, &st); - if (static_cast(st.st_size) < total_size_) - { - invalid_header = true; - return false; - } - - mapped_ptr_ = mmap(nullptr, total_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0); - if (mapped_ptr_ == MAP_FAILED) - { - mapped_ptr_ = nullptr; - return false; - } - - data_ptr_ = static_cast(mapped_ptr_); - header_ptr_ = std::launder( - reinterpret_cast(static_cast(mapped_ptr_) + sizeof(T))); - - if (!header_ptr_->initialized.load(std::memory_order_acquire) - || header_ptr_->magic != detail::ShmHeader::SHM_MAGIC) - { - invalid_header = true; - unmap_region(); - return false; - } - - return true; - } -}; - -template -[[nodiscard]] std::optional> create_shared(const std::string& name, - const T& initial_value) noexcept { - SharedMemory shm(name); - if (shm.open(initial_value)) - return shm; - return std::nullopt; -} - -} // namespace Stockfish::shm - -#endif // #ifndef SHM_LINUX_H_INCLUDED diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp deleted file mode 100644 index 9fe6df9dca032134f7750ec84260b5658e25864b..0000000000000000000000000000000000000000 --- a/src/syzygy/tbprobe.cpp +++ /dev/null @@ -1,1776 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "tbprobe.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../bitboard.h" -#include "../misc.h" -#include "../movegen.h" -#include "../position.h" -#include "../search.h" -#include "../types.h" -#include "../ucioption.h" - -#ifndef _WIN32 - #include - #include - #include -#else - #define WIN32_LEAN_AND_MEAN - #ifndef NOMINMAX - #define NOMINMAX // Disable macros min() and max() - #endif - #include -#endif - -using namespace Stockfish::Tablebases; - -int Stockfish::Tablebases::MaxCardinality; - -namespace Stockfish { - -namespace { - -constexpr int TBPIECES = 7; // Max number of supported pieces -constexpr int MAX_DTZ = - 1 << 18; // Max DTZ supported times 2, large enough to deal with the syzygy TB limit. - -enum { - BigEndian, - LittleEndian -}; -enum TBType { - WDL, - DTZ -}; // Used as template parameter - -// Each table has a set of flags: all of them refer to DTZ tables, the last one to WDL tables -enum TBFlag { - STM = 1, - Mapped = 2, - WinPlies = 4, - LossPlies = 8, - Wide = 16, - SingleValue = 128 -}; - -inline WDLScore operator-(WDLScore d) { return WDLScore(-int(d)); } -inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } - -constexpr std::string_view PieceToChar = " PNBRQK pnbrqk"; - -int MapPawns[SQUARE_NB]; -int MapB1H1H7[SQUARE_NB]; -int MapA1D1D4[SQUARE_NB]; -int MapKK[10][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB] - -int Binomial[6][SQUARE_NB]; // [k][n] k elements from a set of n elements -int LeadPawnIdx[6][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB] -int LeadPawnsSize[6][4]; // [leadPawnsCnt][FILE_A..FILE_D] - -// Comparison function to sort leading pawns in ascending MapPawns[] order -bool pawns_comp(Square i, Square j) { return MapPawns[i] < MapPawns[j]; } -int off_A1H8(Square sq) { return int(rank_of(sq)) - file_of(sq); } - -constexpr Value WDL_to_value[] = {-VALUE_MATE + MAX_PLY + 1, VALUE_DRAW - 2, VALUE_DRAW, - VALUE_DRAW + 2, VALUE_MATE - MAX_PLY - 1}; - -template -inline void swap_endian(T& x) { - static_assert(std::is_unsigned_v, "Argument of swap_endian not unsigned"); - - uint8_t tmp, *c = (uint8_t*) &x; - for (int i = 0; i < Half; ++i) - tmp = c[i], c[i] = c[End - i], c[End - i] = tmp; -} -template<> -inline void swap_endian(uint8_t&) {} - -template -T number(void* addr) { - T v; - - if (uintptr_t(addr) & (alignof(T) - 1)) // Unaligned pointer (very rare) - std::memcpy(&v, addr, sizeof(T)); - else - v = *((T*) addr); - - if (LE != IsLittleEndian) - swap_endian(v); - return v; -} - -// DTZ tables don't store valid scores for moves that reset the rule50 counter -// like captures and pawn moves but we can easily recover the correct dtz of the -// previous move if we know the position's WDL score. -int dtz_before_zeroing(WDLScore wdl) { - return wdl == WDLWin ? 1 - : wdl == WDLCursedWin ? 101 - : wdl == WDLBlessedLoss ? -101 - : wdl == WDLLoss ? -1 - : 0; -} - -// Return the sign of a number (-1, 0, 1) -template -int sign_of(T val) { - return (T(0) < val) - (val < T(0)); -} - -// Numbers in little-endian used by sparseIndex[] to point into blockLength[] -struct SparseEntry { - char block[4]; // Number of block - char offset[2]; // Offset within the block -}; - -static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes"); - -using Sym = uint16_t; // Huffman symbol - -struct LR { - enum Side { - Left, - Right - }; - - uint8_t lr[3]; // The first 12 bits is the left-hand symbol, the second 12 - // bits is the right-hand symbol. If the symbol has length 1, - // then the left-hand symbol is the stored value. - template - Sym get() { - return S == Left ? ((lr[1] & 0xF) << 8) | lr[0] - : S == Right ? (lr[2] << 4) | (lr[1] >> 4) - : (assert(false), Sym(-1)); - } -}; - -static_assert(sizeof(LR) == 3, "LR tree entry must be 3 bytes"); - -// Tablebases data layout is structured as following: -// -// TBFile: memory maps/unmaps the physical .rtbw and .rtbz files -// TBTable: one object for each file with corresponding indexing information -// TBTables: has ownership of TBTable objects, keeping a list and a hash - -// class TBFile memory maps/unmaps the single .rtbw and .rtbz files. Files are -// memory mapped for best performance. Files are mapped at first access: at init -// time only existence of the file is checked. -class TBFile: public std::ifstream { - - std::string fname; - - public: - // Look for and open the file among the Paths directories where the .rtbw - // and .rtbz files can be found. Multiple directories are separated by ";" - // on Windows and by ":" on Unix-based operating systems. - // - // Example: - // C:\tb\wdl345;C:\tb\wdl6;D:\tb\dtz345;D:\tb\dtz6 - static std::string Paths; - - TBFile(const std::string& f) { - -#ifndef _WIN32 - constexpr char SepChar = ':'; -#else - constexpr char SepChar = ';'; -#endif - std::stringstream ss(Paths); - std::string path; - - while (std::getline(ss, path, SepChar)) - { - fname = path + "/" + f; - std::ifstream::open(fname); - if (is_open()) - return; - } - } - - // Memory map the file and check it. - uint8_t* map(void** baseAddress, uint64_t* mapping, TBType type) { - if (is_open()) - close(); // Need to re-open to get native file descriptor - -#ifndef _WIN32 - struct stat statbuf; - int fd = ::open(fname.c_str(), O_RDONLY); - - if (fd == -1) - return *baseAddress = nullptr, nullptr; - - fstat(fd, &statbuf); - - if (statbuf.st_size % 64 != 16) - { - std::cerr << "Corrupt tablebase file " << fname << std::endl; - exit(EXIT_FAILURE); - } - - *mapping = statbuf.st_size; - *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); - #if defined(MADV_RANDOM) - madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); - #endif - ::close(fd); - - if (*baseAddress == MAP_FAILED) - { - std::cerr << "Could not mmap() " << fname << std::endl; - exit(EXIT_FAILURE); - } -#else - // Note FILE_FLAG_RANDOM_ACCESS is only a hint to Windows and as such may get ignored. - HANDLE fd = CreateFileA(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, - OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr); - - if (fd == INVALID_HANDLE_VALUE) - return *baseAddress = nullptr, nullptr; - - DWORD size_high; - DWORD size_low = GetFileSize(fd, &size_high); - - if (size_low % 64 != 16) - { - std::cerr << "Corrupt tablebase file " << fname << std::endl; - exit(EXIT_FAILURE); - } - - HANDLE mmap = CreateFileMapping(fd, nullptr, PAGE_READONLY, size_high, size_low, nullptr); - CloseHandle(fd); - - if (!mmap) - { - std::cerr << "CreateFileMapping() failed" << std::endl; - exit(EXIT_FAILURE); - } - - *mapping = uint64_t(mmap); - *baseAddress = MapViewOfFile(mmap, FILE_MAP_READ, 0, 0, 0); - - if (!*baseAddress) - { - std::cerr << "MapViewOfFile() failed, name = " << fname - << ", error = " << GetLastError() << std::endl; - exit(EXIT_FAILURE); - } -#endif - uint8_t* data = (uint8_t*) *baseAddress; - - constexpr uint8_t Magics[][4] = {{0xD7, 0x66, 0x0C, 0xA5}, {0x71, 0xE8, 0x23, 0x5D}}; - - if (memcmp(data, Magics[type == WDL], 4)) - { - std::cerr << "Corrupted table in file " << fname << std::endl; - unmap(*baseAddress, *mapping); - return *baseAddress = nullptr, nullptr; - } - - return data + 4; // Skip Magics's header - } - - static void unmap(void* baseAddress, uint64_t mapping) { - -#ifndef _WIN32 - munmap(baseAddress, mapping); -#else - UnmapViewOfFile(baseAddress); - CloseHandle((HANDLE) mapping); -#endif - } -}; - -std::string TBFile::Paths; - -// struct PairsData contains low-level indexing information to access TB data. -// There are 8, 4, or 2 PairsData records for each TBTable, according to the type -// of table and if positions have pawns or not. It is populated at first access. -struct PairsData { - uint8_t flags; // Table flags, see enum TBFlag - uint8_t maxSymLen; // Maximum length in bits of the Huffman symbols - uint8_t minSymLen; // Minimum length in bits of the Huffman symbols - uint32_t blocksNum; // Number of blocks in the TB file - size_t sizeofBlock; // Block size in bytes - size_t span; // About every span values there is a SparseIndex[] entry - Sym* lowestSym; // lowestSym[l] is the symbol of length l with the lowest value - LR* btree; // btree[sym] stores the left and right symbols that expand sym - uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536 - uint32_t blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum - SparseEntry* sparseIndex; // Partial indices into blockLength[] - size_t sparseIndexSize; // Size of SparseIndex[] table - uint8_t* data; // Start of Huffman compressed data - std::vector - base64; // base64[l - min_sym_len] is the 64bit-padded lowest symbol of length l - std::vector - symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256 - Piece pieces[TBPIECES]; // Position pieces: the order of pieces defines the groups - uint64_t groupIdx[TBPIECES + 1]; // Start index used for the encoding of the group's pieces - int groupLen[TBPIECES + 1]; // Number of pieces in a given group: KRKN -> (3, 1) - uint16_t map_idx[4]; // WDLWin, WDLLoss, WDLCursedWin, WDLBlessedLoss (used in DTZ) -}; - -// struct TBTable contains indexing information to access the corresponding TBFile. -// There are 2 types of TBTable, corresponding to a WDL or a DTZ file. TBTable -// is populated at init time but the nested PairsData records are populated at -// first access, when the corresponding file is memory mapped. -template -struct TBTable { - using Ret = std::conditional_t; - - static constexpr int Sides = Type == WDL ? 2 : 1; - - std::atomic_bool ready; - void* baseAddress; - uint8_t* map; - uint64_t mapping; - Key key; - Key key2; - int pieceCount; - bool hasPawns; - bool hasUniquePieces; - uint8_t pawnCount[2]; // [Lead color / other color] - PairsData items[Sides][4]; // [wtm / btm][FILE_A..FILE_D or 0] - - PairsData* get(int stm, int f) { return &items[stm % Sides][hasPawns ? f : 0]; } - - TBTable() : - ready(false), - baseAddress(nullptr) {} - explicit TBTable(const std::string& code); - explicit TBTable(const TBTable& wdl); - - ~TBTable() { - if (baseAddress) - TBFile::unmap(baseAddress, mapping); - } -}; - -template<> -TBTable::TBTable(const std::string& code) : - TBTable() { - - StateInfo st; - Position pos; - - key = pos.set(code, WHITE, &st).material_key(); - pieceCount = pos.count(); - hasPawns = pos.pieces(PAWN); - - hasUniquePieces = false; - for (Color c : {WHITE, BLACK}) - for (PieceType pt = PAWN; pt < KING; ++pt) - if (popcount(pos.pieces(c, pt)) == 1) - hasUniquePieces = true; - - // Set the leading color. In case both sides have pawns the leading color - // is the side with fewer pawns because this leads to better compression. - bool c = !pos.count(BLACK) - || (pos.count(WHITE) && pos.count(BLACK) >= pos.count(WHITE)); - - pawnCount[0] = pos.count(c ? WHITE : BLACK); - pawnCount[1] = pos.count(c ? BLACK : WHITE); - - key2 = pos.set(code, BLACK, &st).material_key(); -} - -template<> -TBTable::TBTable(const TBTable& wdl) : - TBTable() { - - // Use the corresponding WDL table to avoid recalculating all from scratch - key = wdl.key; - key2 = wdl.key2; - pieceCount = wdl.pieceCount; - hasPawns = wdl.hasPawns; - hasUniquePieces = wdl.hasUniquePieces; - pawnCount[0] = wdl.pawnCount[0]; - pawnCount[1] = wdl.pawnCount[1]; -} - -// class TBTables creates and keeps ownership of the TBTable objects, one for -// each TB file found. It supports a fast, hash-based, table lookup. Populated -// at init time, accessed at probe time. -class TBTables { - - struct Entry { - Key key; - TBTable* wdl; - TBTable* dtz; - - template - TBTable* get() const { - return (TBTable*) (Type == WDL ? (void*) wdl : (void*) dtz); - } - }; - - static constexpr int Size = 1 << 12; // 4K table, indexed by key's 12 lsb - static constexpr int Overflow = 1; // Number of elements allowed to map to the last bucket - - Entry hashTable[Size + Overflow]; - - std::deque> wdlTable; - std::deque> dtzTable; - size_t foundDTZFiles = 0; - size_t foundWDLFiles = 0; - - void insert(Key key, TBTable* wdl, TBTable* dtz) { - uint32_t homeBucket = uint32_t(key) & (Size - 1); - Entry entry{key, wdl, dtz}; - - // Ensure last element is empty to avoid overflow when looking up - for (uint32_t bucket = homeBucket; bucket < Size + Overflow - 1; ++bucket) - { - Key otherKey = hashTable[bucket].key; - if (otherKey == key || !hashTable[bucket].get()) - { - hashTable[bucket] = entry; - return; - } - - // Robin Hood hashing: If we've probed for longer than this element, - // insert here and search for a new spot for the other element instead. - uint32_t otherHomeBucket = uint32_t(otherKey) & (Size - 1); - if (otherHomeBucket > homeBucket) - { - std::swap(entry, hashTable[bucket]); - key = otherKey; - homeBucket = otherHomeBucket; - } - } - std::cerr << "TB hash table size too low!" << std::endl; - exit(EXIT_FAILURE); - } - - public: - template - TBTable* get(Key key) { - for (const Entry* entry = &hashTable[uint32_t(key) & (Size - 1)];; ++entry) - { - if (entry->key == key || !entry->get()) - return entry->get(); - } - } - - void clear() { - memset(hashTable, 0, sizeof(hashTable)); - wdlTable.clear(); - dtzTable.clear(); - foundDTZFiles = 0; - foundWDLFiles = 0; - } - - void info() const { - sync_cout << "info string Found " << foundWDLFiles << " WDL and " << foundDTZFiles - << " DTZ tablebase files (up to " << MaxCardinality << "-man)." << sync_endl; - } - - void add(const std::vector& pieces); -}; - -TBTables TBTables; - -// If the corresponding file exists two new objects TBTable and TBTable -// are created and added to the lists and hash table. Called at init time. -void TBTables::add(const std::vector& pieces) { - - std::string code; - - for (PieceType pt : pieces) - code += PieceToChar[pt]; - code.insert(code.find('K', 1), "v"); - - TBFile file_dtz(code + ".rtbz"); // KRK -> KRvK - if (file_dtz.is_open()) - { - file_dtz.close(); - foundDTZFiles++; - } - - TBFile file(code + ".rtbw"); // KRK -> KRvK - - if (!file.is_open()) // Only WDL file is checked - return; - - file.close(); - foundWDLFiles++; - - MaxCardinality = std::max(int(pieces.size()), MaxCardinality); - - wdlTable.emplace_back(code); - dtzTable.emplace_back(wdlTable.back()); - - // Insert into the hash keys for both colors: KRvK with KR white and black - insert(wdlTable.back().key, &wdlTable.back(), &dtzTable.back()); - insert(wdlTable.back().key2, &wdlTable.back(), &dtzTable.back()); -} - -// TB tables are compressed with canonical Huffman code. The compressed data is divided into -// blocks of size d->sizeofBlock, and each block stores a variable number of symbols. -// Each symbol represents either a WDL or a (remapped) DTZ value, or a pair of other symbols -// (recursively). If you keep expanding the symbols in a block, you end up with up to 65536 -// WDL or DTZ values. Each symbol represents up to 256 values and will correspond after -// Huffman coding to at least 1 bit. So a block of 32 bytes corresponds to at most -// 32 x 8 x 256 = 65536 values. This maximum is only reached for tables that consist mostly -// of draws or mostly of wins, but such tables are actually quite common. In principle, the -// blocks in WDL tables are 64 bytes long (and will be aligned on cache lines). But for -// mostly-draw or mostly-win tables this can leave many 64-byte blocks only half-filled, so -// in such cases blocks are 32 bytes long. The blocks of DTZ tables are up to 1024 bytes long. -// The generator picks the size that leads to the smallest table. The "book" of symbols and -// Huffman codes are the same for all blocks in the table. A non-symmetric pawnless TB file -// will have one table for wtm and one for btm, a TB file with pawns will have tables per -// file a,b,c,d also, in this case, one set for wtm and one for btm. -int decompress_pairs(PairsData* d, uint64_t idx) { - - // Special case where all table positions store the same value - if (d->flags & TBFlag::SingleValue) - return d->minSymLen; - - // First we need to locate the right block that stores the value at index "idx". - // Because each block n stores blockLength[n] + 1 values, the index i of the block - // that contains the value at position idx is: - // - // for (i = -1, sum = 0; sum <= idx; i++) - // sum += blockLength[i + 1] + 1; - // - // This can be slow, so we use SparseIndex[] populated with a set of SparseEntry that - // point to known indices into blockLength[]. Namely SparseIndex[k] is a SparseEntry - // that stores the blockLength[] index and the offset within that block of the value - // with index I(k), where: - // - // I(k) = k * d->span + d->span / 2 (1) - - // First step is to get the 'k' of the I(k) nearest to our idx, using definition (1) - uint32_t k = uint32_t(idx / d->span); - - // Then we read the corresponding SparseIndex[] entry - uint32_t block = number(&d->sparseIndex[k].block); - int offset = number(&d->sparseIndex[k].offset); - - // Now compute the difference idx - I(k). From the definition of k, we know that - // - // idx = k * d->span + idx % d->span (2) - // - // So from (1) and (2) we can compute idx - I(K): - int diff = int(idx % d->span - d->span / 2); - - // Sum the above to offset to find the offset corresponding to our idx - offset += diff; - - // Move to the previous/next block, until we reach the correct block that contains idx, - // that is when 0 <= offset <= d->blockLength[block] - while (offset < 0) - offset += d->blockLength[--block] + 1; - - while (offset > d->blockLength[block]) - offset -= d->blockLength[block++] + 1; - - // Finally, we find the start address of our block of canonical Huffman symbols - uint32_t* ptr = (uint32_t*) (d->data + (uint64_t(block) * d->sizeofBlock)); - - // Read the first 64 bits in our block, this is a (truncated) sequence of - // unknown number of symbols of unknown length but we know the first one - // is at the beginning of this 64-bit sequence. - uint64_t buf64 = number(ptr); - ptr += 2; - int buf64Size = 64; - Sym sym; - - while (true) - { - int len = 0; // This is the symbol length - d->min_sym_len - - // Now get the symbol length. For any symbol s64 of length l right-padded - // to 64 bits we know that d->base64[l-1] >= s64 >= d->base64[l] so we - // can find the symbol length iterating through base64[]. - while (buf64 < d->base64[len]) - ++len; - - // All the symbols of a given length are consecutive integers (numerical - // sequence property), so we can compute the offset of our symbol of - // length len, stored at the beginning of buf64. - sym = Sym((buf64 - d->base64[len]) >> (64 - len - d->minSymLen)); - - // Now add the value of the lowest symbol of length len to get our symbol - sym += number(&d->lowestSym[len]); - - // If our offset is within the number of values represented by symbol sym, - // we are done. - if (offset < d->symlen[sym] + 1) - break; - - // ...otherwise update the offset and continue to iterate - offset -= d->symlen[sym] + 1; - len += d->minSymLen; // Get the real length - buf64 <<= len; // Consume the just processed symbol - buf64Size -= len; - - if (buf64Size <= 32) - { // Refill the buffer - buf64Size += 32; - buf64 |= uint64_t(number(ptr++)) << (64 - buf64Size); - } - } - - // Now we have our symbol that expands into d->symlen[sym] + 1 symbols. - // We binary-search for our value recursively expanding into the left and - // right child symbols until we reach a leaf node where symlen[sym] + 1 == 1 - // that will store the value we need. - while (d->symlen[sym]) - { - Sym left = d->btree[sym].get(); - - // If a symbol contains 36 sub-symbols (d->symlen[sym] + 1 = 36) and - // expands in a pair (d->symlen[left] = 23, d->symlen[right] = 11), then - // we know that, for instance, the tenth value (offset = 10) will be on - // the left side because in Recursive Pairing child symbols are adjacent. - if (offset < d->symlen[left] + 1) - sym = left; - else - { - offset -= d->symlen[left] + 1; - sym = d->btree[sym].get(); - } - } - - return d->btree[sym].get(); -} - -bool check_dtz_stm(TBTable*, int, File) { return true; } - -bool check_dtz_stm(TBTable* entry, int stm, File f) { - - auto flags = entry->get(stm, f)->flags; - return (flags & TBFlag::STM) == stm || ((entry->key == entry->key2) && !entry->hasPawns); -} - -// DTZ scores are sorted by frequency of occurrence and then assigned the -// values 0, 1, 2, ... in order of decreasing frequency. This is done for each -// of the four WDLScore values. The mapping information necessary to reconstruct -// the original values are stored in the TB file and read during map[] init. -WDLScore map_score(TBTable*, File, int value, WDLScore) { return WDLScore(value - 2); } - -int map_score(TBTable* entry, File f, int value, WDLScore wdl) { - - constexpr int WDLMap[] = {1, 3, 0, 2, 0}; - - auto flags = entry->get(0, f)->flags; - - uint8_t* map = entry->map; - uint16_t* idx = entry->get(0, f)->map_idx; - if (flags & TBFlag::Mapped) - { - if (flags & TBFlag::Wide) - value = ((uint16_t*) map)[idx[WDLMap[wdl + 2]] + value]; - else - value = map[idx[WDLMap[wdl + 2]] + value]; - } - - // DTZ tables store distance to zero in number of moves or plies. We - // want to return plies, so we have to convert to plies when needed. - if ((wdl == WDLWin && !(flags & TBFlag::WinPlies)) - || (wdl == WDLLoss && !(flags & TBFlag::LossPlies)) || wdl == WDLCursedWin - || wdl == WDLBlessedLoss) - value *= 2; - - return value + 1; -} - -// A temporary fix for the compiler bug with vectorization. (#4450) -#if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 15 - #define DISABLE_CLANG_LOOP_VEC _Pragma("clang loop vectorize(disable)") -#else - #define DISABLE_CLANG_LOOP_VEC -#endif - -// Compute a unique index out of a position and use it to probe the TB file. To -// encode k pieces of the same type and color, first sort the pieces by square in -// ascending order s1 <= s2 <= ... <= sk then compute the unique index as: -// -// idx = Binomial[1][s1] + Binomial[2][s2] + ... + Binomial[k][sk] -// -template -Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* result) { - - Square squares[TBPIECES]; - Piece pieces[TBPIECES]; - uint64_t idx; - int next = 0, size = 0, leadPawnsCnt = 0; - PairsData* d; - Bitboard b, leadPawns = 0; - File tbFile = FILE_A; - - // A given TB entry like KRK has associated two material keys: KRvk and Kvkr. - // If both sides have the same pieces keys are equal. In this case TB tables - // only stores the 'white to move' case, so if the position to lookup has black - // to move, we need to switch the color and flip the squares before to lookup. - bool symmetricBlackToMove = (entry->key == entry->key2 && pos.side_to_move()); - - // TB files are calculated for white as the stronger side. For instance, we - // have KRvK, not KvKR. A position where the stronger side is white will have - // its material key == entry->key, otherwise we have to switch the color and - // flip the squares before to lookup. - bool blackStronger = (pos.material_key() != entry->key); - - int flipColor = (symmetricBlackToMove || blackStronger) * 8; - int flipSquares = (symmetricBlackToMove || blackStronger) * 56; - int stm = (symmetricBlackToMove || blackStronger) ^ pos.side_to_move(); - - // For pawns, TB files store 4 separate tables according if leading pawn is on - // file a, b, c or d after reordering. The leading pawn is the one with maximum - // MapPawns[] value, that is the one most toward the edges and with lowest rank. - if (entry->hasPawns) - { - - // In all the 4 tables, pawns are at the beginning of the piece sequence and - // their color is the reference one. So we just pick the first one. - Piece pc = Piece(entry->get(0, 0)->pieces[0] ^ flipColor); - - assert(type_of(pc) == PAWN); - - leadPawns = b = pos.pieces(color_of(pc), PAWN); - do - squares[size++] = pop_lsb(b) ^ flipSquares; - while (b); - - leadPawnsCnt = size; - - std::swap(squares[0], *std::max_element(squares, squares + leadPawnsCnt, pawns_comp)); - - tbFile = File(edge_distance(file_of(squares[0]))); - } - - // DTZ tables are one-sided, i.e. they store positions only for white to - // move or only for black to move, so check for side to move to be stm, - // early exit otherwise. - if (!check_dtz_stm(entry, stm, tbFile)) - return *result = CHANGE_STM, Ret(); - - // Now we are ready to get all the position pieces (but the lead pawns) and - // directly map them to the correct color and square. - b = pos.pieces() ^ leadPawns; - do - { - Square s = pop_lsb(b); - squares[size] = s ^ flipSquares; - pieces[size++] = Piece(pos.piece_on(s) ^ flipColor); - } while (b); - - assert(size >= 2); - - d = entry->get(stm, tbFile); - - // Then we reorder the pieces to have the same sequence as the one stored - // in pieces[i]: the sequence that ensures the best compression. - for (int i = leadPawnsCnt; i < size - 1; ++i) - for (int j = i + 1; j < size; ++j) - if (d->pieces[i] == pieces[j]) - { - std::swap(pieces[i], pieces[j]); - std::swap(squares[i], squares[j]); - break; - } - - // Now we map again the squares so that the square of the lead piece is in - // the triangle A1-D1-D4. - if (file_of(squares[0]) > FILE_D) - { - DISABLE_CLANG_LOOP_VEC - for (int i = 0; i < size; ++i) - squares[i] = flip_file(squares[i]); - } - - // Encode leading pawns starting with the one with minimum MapPawns[] and - // proceeding in ascending order. - if (entry->hasPawns) - { - idx = LeadPawnIdx[leadPawnsCnt][squares[0]]; - - std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp); - - for (int i = 1; i < leadPawnsCnt; ++i) - idx += Binomial[i][MapPawns[squares[i]]]; - - goto encode_remaining; // With pawns we have finished special treatments - } - - // In positions without pawns, we further flip the squares to ensure leading - // piece is below RANK_5. - if (rank_of(squares[0]) > RANK_4) - { - DISABLE_CLANG_LOOP_VEC - for (int i = 0; i < size; ++i) - squares[i] = flip_rank(squares[i]); - } - - // Look for the first piece of the leading group not on the A1-D4 diagonal - // and ensure it is mapped below the diagonal. - DISABLE_CLANG_LOOP_VEC - for (int i = 0; i < d->groupLen[0]; ++i) - { - if (!off_A1H8(squares[i])) - continue; - - if (off_A1H8(squares[i]) > 0) // A1-H8 diagonal flip: SQ_A3 -> SQ_C1 - { - DISABLE_CLANG_LOOP_VEC - for (int j = i; j < size; ++j) - squares[j] = Square(((squares[j] >> 3) | (squares[j] << 3)) & 63); - } - break; - } - - // Encode the leading group. - // - // Suppose we have KRvK. Let's say the pieces are on square numbers wK, wR - // and bK (each 0...63). The simplest way to map this position to an index - // is like this: - // - // index = wK * 64 * 64 + wR * 64 + bK; - // - // But this way the TB is going to have 64*64*64 = 262144 positions, with - // lots of positions being equivalent (because they are mirrors of each - // other) and lots of positions being invalid (two pieces on one square, - // adjacent kings, etc.). - // Usually the first step is to take the wK and bK together. There are just - // 462 ways legal and not-mirrored ways to place the wK and bK on the board. - // Once we have placed the wK and bK, there are 62 squares left for the wR - // Mapping its square from 0..63 to available squares 0..61 can be done like: - // - // wR -= (wR > wK) + (wR > bK); - // - // In words: if wR "comes later" than wK, we deduct 1, and the same if wR - // "comes later" than bK. In case of two same pieces like KRRvK we want to - // place the two Rs "together". If we have 62 squares left, we can place two - // Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be - // swapped and still get the same position.) - // - // In case we have at least 3 unique pieces (including kings) we encode them - // together. - if (entry->hasUniquePieces) - { - - int adjust1 = squares[1] > squares[0]; - int adjust2 = (squares[2] > squares[0]) + (squares[2] > squares[1]); - - // First piece is below a1-h8 diagonal. MapA1D1D4[] maps the b1-d1-d3 - // triangle to 0...5. There are 63 squares for second piece and 62 - // (mapped to 0...61) for the third. - if (off_A1H8(squares[0])) - idx = (MapA1D1D4[squares[0]] * 63 + (squares[1] - adjust1)) * 62 + squares[2] - adjust2; - - // First piece is on a1-h8 diagonal, second below: map this occurrence to - // 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal - // to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27. - else if (off_A1H8(squares[1])) - idx = (6 * 63 + rank_of(squares[0]) * 28 + MapB1H1H7[squares[1]]) * 62 + squares[2] - - adjust2; - - // First two pieces are on a1-h8 diagonal, third below - else if (off_A1H8(squares[2])) - idx = 6 * 63 * 62 + 4 * 28 * 62 + rank_of(squares[0]) * 7 * 28 - + (rank_of(squares[1]) - adjust1) * 28 + MapB1H1H7[squares[2]]; - - // All 3 pieces on the diagonal a1-h8 - else - idx = 6 * 63 * 62 + 4 * 28 * 62 + 4 * 7 * 28 + rank_of(squares[0]) * 7 * 6 - + (rank_of(squares[1]) - adjust1) * 6 + (rank_of(squares[2]) - adjust2); - } - else - // We don't have at least 3 unique pieces, like in KRRvKBB, just map - // the kings. - idx = MapKK[MapA1D1D4[squares[0]]][squares[1]]; - -encode_remaining: - idx *= d->groupIdx[0]; - Square* groupSq = squares + d->groupLen[0]; - - // Encode remaining pawns and then pieces according to square, in ascending order - bool remainingPawns = entry->hasPawns && entry->pawnCount[1]; - - while (d->groupLen[++next]) - { - std::stable_sort(groupSq, groupSq + d->groupLen[next]); - uint64_t n = 0; - - // Map down a square if "comes later" than a square in the previous - // groups (similar to what was done earlier for leading group pieces). - for (int i = 0; i < d->groupLen[next]; ++i) - { - auto f = [&](Square s) { return groupSq[i] > s; }; - auto adjust = std::count_if(squares, groupSq, f); - n += Binomial[i + 1][groupSq[i] - adjust - 8 * remainingPawns]; - } - - remainingPawns = false; - idx += n * d->groupIdx[next]; - groupSq += d->groupLen[next]; - } - - // Now that we have the index, decompress the pair and get the score - return map_score(entry, tbFile, decompress_pairs(d, idx), wdl); -} - -// Group together pieces that will be encoded together. The general rule is that -// a group contains pieces of the same type and color. The exception is the leading -// group that, in case of positions without pawns, can be formed by 3 different -// pieces (default) or by the king pair when there is not a unique piece apart -// from the kings. When there are pawns, pawns are always first in pieces[]. -// -// As example KRKN -> KRK + N, KNNK -> KK + NN, KPPKP -> P + PP + K + K -// -// The actual grouping depends on the TB generator and can be inferred from the -// sequence of pieces in piece[] array. -template -void set_groups(T& e, PairsData* d, int order[], File f) { - - int n = 0, firstLen = e.hasPawns ? 0 : e.hasUniquePieces ? 3 : 2; - d->groupLen[n] = 1; - - // Number of pieces per group is stored in groupLen[], for instance in KRKN - // the encoder will default on '111', so groupLen[] will be (3, 1). - for (int i = 1; i < e.pieceCount; ++i) - if (--firstLen > 0 || d->pieces[i] == d->pieces[i - 1]) - d->groupLen[n]++; - else - d->groupLen[++n] = 1; - - d->groupLen[++n] = 0; // Zero-terminated - - // The sequence in pieces[] defines the groups, but not the order in which - // they are encoded. If the pieces in a group g can be combined on the board - // in N(g) different ways, then the position encoding will be of the form: - // - // g1 * N(g2) * N(g3) + g2 * N(g3) + g3 - // - // This ensures unique encoding for the whole position. The order of the - // groups is a per-table parameter and could not follow the canonical leading - // pawns/pieces -> remaining pawns -> remaining pieces. In particular the - // first group is at order[0] position and the remaining pawns, when present, - // are at order[1] position. - bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides - int next = pp ? 2 : 1; - int freeSquares = 64 - d->groupLen[0] - (pp ? d->groupLen[1] : 0); - uint64_t idx = 1; - - for (int k = 0; next < n || k == order[0] || k == order[1]; ++k) - if (k == order[0]) // Leading pawns or pieces - { - d->groupIdx[0] = idx; - idx *= e.hasPawns ? LeadPawnsSize[d->groupLen[0]][f] : e.hasUniquePieces ? 31332 : 462; - } - else if (k == order[1]) // Remaining pawns - { - d->groupIdx[1] = idx; - idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]]; - } - else // Remaining pieces - { - d->groupIdx[next] = idx; - idx *= Binomial[d->groupLen[next]][freeSquares]; - freeSquares -= d->groupLen[next++]; - } - - d->groupIdx[n] = idx; -} - -// In Recursive Pairing each symbol represents a pair of children symbols. So -// read d->btree[] symbols data and expand each one in his left and right child -// symbol until reaching the leaves that represent the symbol value. -uint8_t set_symlen(PairsData* d, Sym s, std::vector& visited) { - - visited[s] = true; // We can set it now because tree is acyclic - Sym sr = d->btree[s].get(); - - if (sr == 0xFFF) - return 0; - - Sym sl = d->btree[s].get(); - - if (!visited[sl]) - d->symlen[sl] = set_symlen(d, sl, visited); - - if (!visited[sr]) - d->symlen[sr] = set_symlen(d, sr, visited); - - return d->symlen[sl] + d->symlen[sr] + 1; -} - -uint8_t* set_sizes(PairsData* d, uint8_t* data) { - - d->flags = *data++; - - if (d->flags & TBFlag::SingleValue) - { - d->blocksNum = d->blockLengthSize = 0; - d->span = d->sparseIndexSize = 0; // Broken MSVC zero-init - d->minSymLen = *data++; // Here we store the single value - return data; - } - - // groupLen[] is a zero-terminated list of group lengths, the last groupIdx[] - // element stores the biggest index that is the tb size. - uint64_t tbSize = d->groupIdx[std::find(d->groupLen, d->groupLen + 7, 0) - d->groupLen]; - - d->sizeofBlock = 1ULL << *data++; - d->span = 1ULL << *data++; - d->sparseIndexSize = size_t((tbSize + d->span - 1) / d->span); // Round up - auto padding = number(data++); - d->blocksNum = number(data); - data += sizeof(uint32_t); - d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[] - // does not point out of range. - d->maxSymLen = *data++; - d->minSymLen = *data++; - d->lowestSym = (Sym*) data; - d->base64.resize(d->maxSymLen - d->minSymLen + 1); - - // See https://en.wikipedia.org/wiki/Huffman_coding - // The canonical code is ordered such that longer symbols (in terms of - // the number of bits of their Huffman code) have a lower numeric value, - // so that d->lowestSym[i] >= d->lowestSym[i+1] (when read as LittleEndian). - // Starting from this we compute a base64[] table indexed by symbol length - // and containing 64 bit values so that d->base64[i] >= d->base64[i+1]. - - // Implementation note: we first cast the unsigned size_t "base64.size()" - // to a signed int "base64_size" variable and then we are able to subtract 2, - // avoiding unsigned overflow warnings. - - int base64_size = static_cast(d->base64.size()); - for (int i = base64_size - 2; i >= 0; --i) - { - d->base64[i] = (d->base64[i + 1] + number(&d->lowestSym[i]) - - number(&d->lowestSym[i + 1])) - / 2; - - assert(d->base64[i] * 2 >= d->base64[i + 1]); - } - - // Now left-shift by an amount so that d->base64[i] gets shifted 1 bit more - // than d->base64[i+1] and given the above assert condition, we ensure that - // d->base64[i] >= d->base64[i+1]. Moreover for any symbol s64 of length i - // and right-padded to 64 bits holds d->base64[i-1] >= s64 >= d->base64[i]. - for (int i = 0; i < base64_size; ++i) - d->base64[i] <<= 64 - i - d->minSymLen; // Right-padding to 64 bits - - data += base64_size * sizeof(Sym); - d->symlen.resize(number(data)); - data += sizeof(uint16_t); - d->btree = (LR*) data; - - // The compression scheme used is "Recursive Pairing", that replaces the most - // frequent adjacent pair of symbols in the source message by a new symbol, - // reevaluating the frequencies of all of the symbol pairs with respect to - // the extended alphabet, and then repeating the process. - // See https://web.archive.org/web/20201106232444/http://www.larsson.dogma.net/dcc99.pdf - std::vector visited(d->symlen.size()); - - for (Sym sym = 0; sym < d->symlen.size(); ++sym) - if (!visited[sym]) - d->symlen[sym] = set_symlen(d, sym, visited); - - return data + d->symlen.size() * sizeof(LR) + (d->symlen.size() & 1); -} - -uint8_t* set_dtz_map(TBTable&, uint8_t* data, File) { return data; } - -uint8_t* set_dtz_map(TBTable& e, uint8_t* data, File maxFile) { - - e.map = data; - - for (File f = FILE_A; f <= maxFile; ++f) - { - auto flags = e.get(0, f)->flags; - if (flags & TBFlag::Mapped) - { - if (flags & TBFlag::Wide) - { - data += uintptr_t(data) & 1; // Word alignment, we may have a mixed table - for (int i = 0; i < 4; ++i) - { // Sequence like 3,x,x,x,1,x,0,2,x,x - e.get(0, f)->map_idx[i] = uint16_t((uint16_t*) data - (uint16_t*) e.map + 1); - data += 2 * number(data) + 2; - } - } - else - { - for (int i = 0; i < 4; ++i) - { - e.get(0, f)->map_idx[i] = uint16_t(data - e.map + 1); - data += *data + 1; - } - } - } - } - - return data += uintptr_t(data) & 1; // Word alignment -} - -// Populate entry's PairsData records with data from the just memory-mapped file. -// Called at first access. -template -void set(T& e, uint8_t* data) { - - PairsData* d; - - enum { - Split = 1, - HasPawns = 2 - }; - - assert(e.hasPawns == bool(*data & HasPawns)); - assert((e.key != e.key2) == bool(*data & Split)); - - data++; // First byte stores flags - - const int sides = T::Sides == 2 && (e.key != e.key2) ? 2 : 1; - const File maxFile = e.hasPawns ? FILE_D : FILE_A; - - bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides - - assert(!pp || e.pawnCount[0]); - - for (File f = FILE_A; f <= maxFile; ++f) - { - - for (int i = 0; i < sides; i++) - *e.get(i, f) = PairsData(); - - int order[][2] = {{*data & 0xF, pp ? *(data + 1) & 0xF : 0xF}, - {*data >> 4, pp ? *(data + 1) >> 4 : 0xF}}; - data += 1 + pp; - - for (int k = 0; k < e.pieceCount; ++k, ++data) - for (int i = 0; i < sides; i++) - e.get(i, f)->pieces[k] = Piece(i ? *data >> 4 : *data & 0xF); - - for (int i = 0; i < sides; ++i) - set_groups(e, e.get(i, f), order[i], f); - } - - data += uintptr_t(data) & 1; // Word alignment - - for (File f = FILE_A; f <= maxFile; ++f) - for (int i = 0; i < sides; i++) - data = set_sizes(e.get(i, f), data); - - data = set_dtz_map(e, data, maxFile); - - for (File f = FILE_A; f <= maxFile; ++f) - for (int i = 0; i < sides; i++) - { - (d = e.get(i, f))->sparseIndex = (SparseEntry*) data; - data += d->sparseIndexSize * sizeof(SparseEntry); - } - - for (File f = FILE_A; f <= maxFile; ++f) - for (int i = 0; i < sides; i++) - { - (d = e.get(i, f))->blockLength = (uint16_t*) data; - data += d->blockLengthSize * sizeof(uint16_t); - } - - for (File f = FILE_A; f <= maxFile; ++f) - for (int i = 0; i < sides; i++) - { - data = (uint8_t*) ((uintptr_t(data) + 0x3F) & ~0x3F); // 64 byte alignment - (d = e.get(i, f))->data = data; - data += d->blocksNum * d->sizeofBlock; - } -} - -// If the TB file corresponding to the given position is already memory-mapped -// then return its base address, otherwise, try to memory map and init it. Called -// at every probe, memory map, and init only at first access. Function is thread -// safe and can be called concurrently. -template -void* mapped(TBTable& e, const Position& pos) { - - static std::mutex mutex; - // Because TB is the only usage of materialKey, check it here in debug mode - assert(pos.material_key_is_ok()); - - // Use 'acquire' to avoid a thread reading 'ready' == true while - // another is still working. (compiler reordering may cause this). - if (e.ready.load(std::memory_order_acquire)) - return e.baseAddress; // Could be nullptr if file does not exist - - std::scoped_lock lk(mutex); - - if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock - return e.baseAddress; - - // Pieces strings in decreasing order for each color, like ("KPP","KR") - std::string fname, w, b; - for (PieceType pt = KING; pt >= PAWN; --pt) - { - w += std::string(popcount(pos.pieces(WHITE, pt)), PieceToChar[pt]); - b += std::string(popcount(pos.pieces(BLACK, pt)), PieceToChar[pt]); - } - - fname = - (e.key == pos.material_key() ? w + 'v' + b : b + 'v' + w) + (Type == WDL ? ".rtbw" : ".rtbz"); - - uint8_t* data = TBFile(fname).map(&e.baseAddress, &e.mapping, Type); - - if (data) - set(e, data); - - e.ready.store(true, std::memory_order_release); - return e.baseAddress; -} - -template::Ret> -Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) { - - if (pos.count() == 2) // KvK - return Ret(WDLDraw); - - TBTable* entry = TBTables.get(pos.material_key()); - - if (!entry || !mapped(*entry, pos)) - return *result = FAIL, Ret(); - - return do_probe_table(pos, entry, wdl, result); -} - -// For a position where the side to move has a winning capture it is not necessary -// to store a winning value so the generator treats such positions as "don't care" -// and tries to assign to it a value that improves the compression ratio. Similarly, -// if the side to move has a drawing capture, then the position is at least drawn. -// If the position is won, then the TB needs to store a win value. But if the -// position is drawn, the TB may store a loss value if that is better for compression. -// All of this means that during probing, the engine must look at captures and probe -// their results and must probe the position itself. The "best" result of these -// probes is the correct result for the position. -// DTZ tables do not store values when a following move is a zeroing winning move -// (winning capture or winning pawn move). Also, DTZ store wrong values for positions -// where the best move is an ep-move (even if losing). So in all these cases set -// the state to ZEROING_BEST_MOVE. -template -WDLScore search(Position& pos, ProbeState* result) { - - WDLScore value, bestValue = WDLLoss; - StateInfo st; - - auto moveList = MoveList(pos); - size_t totalCount = moveList.size(), moveCount = 0; - - for (const Move move : moveList) - { - if (!pos.capture(move) && (!CheckZeroingMoves || type_of(pos.moved_piece(move)) != PAWN)) - continue; - - moveCount++; - - pos.do_move(move, st); - value = -search(pos, result); - pos.undo_move(move); - - if (*result == FAIL) - return WDLDraw; - - if (value > bestValue) - { - bestValue = value; - - if (value >= WDLWin) - { - *result = ZEROING_BEST_MOVE; // Winning DTZ-zeroing move - return value; - } - } - } - - // In case we have already searched all the legal moves we don't have to probe - // the TB because the stored score could be wrong. For instance TB tables - // do not contain information on position with ep rights, so in this case - // the result of probe_wdl_table is wrong. Also in case of only capture - // moves, for instance here 4K3/4q3/6p1/2k5/6p1/8/8/8 w - - 0 7, we have to - // return with ZEROING_BEST_MOVE set. - bool noMoreMoves = (moveCount && moveCount == totalCount); - - if (noMoreMoves) - value = bestValue; - else - { - value = probe_table(pos, result); - - if (*result == FAIL) - return WDLDraw; - } - - // DTZ stores a "don't care" value if bestValue is a win - if (bestValue >= value) - return *result = (bestValue > WDLDraw || noMoreMoves ? ZEROING_BEST_MOVE : OK), bestValue; - - return *result = OK, value; -} - -} // namespace - - -// Called at startup and after every change to -// "SyzygyPath" UCI option to (re)create the various tables. It is not thread -// safe, nor it needs to be. -void Tablebases::init(const std::string& paths) { - - TBTables.clear(); - MaxCardinality = 0; - TBFile::Paths = paths; - - if (paths.empty()) - return; - - // MapB1H1H7[] encodes a square below a1-h8 diagonal to 0..27 - int code = 0; - for (Square s = SQ_A1; s <= SQ_H8; ++s) - if (off_A1H8(s) < 0) - MapB1H1H7[s] = code++; - - // MapA1D1D4[] encodes a square in the a1-d1-d4 triangle to 0..9 - std::vector diagonal; - code = 0; - for (Square s = SQ_A1; s <= SQ_D4; ++s) - if (off_A1H8(s) < 0 && file_of(s) <= FILE_D) - MapA1D1D4[s] = code++; - - else if (!off_A1H8(s) && file_of(s) <= FILE_D) - diagonal.push_back(s); - - // Diagonal squares are encoded as last ones - for (auto s : diagonal) - MapA1D1D4[s] = code++; - - // MapKK[] encodes all the 462 possible legal positions of two kings where - // the first is in the a1-d1-d4 triangle. If the first king is on the a1-d4 - // diagonal, the other one shall not be above the a1-h8 diagonal. - std::vector> bothOnDiagonal; - code = 0; - for (int idx = 0; idx < 10; idx++) - for (Square s1 = SQ_A1; s1 <= SQ_D4; ++s1) - if (MapA1D1D4[s1] == idx && (idx || s1 == SQ_B1)) // SQ_B1 is mapped to 0 - { - for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) - if ((PseudoAttacks[KING][s1] | s1) & s2) - continue; // Illegal position - - else if (!off_A1H8(s1) && off_A1H8(s2) > 0) - continue; // First on diagonal, second above - - else if (!off_A1H8(s1) && !off_A1H8(s2)) - bothOnDiagonal.emplace_back(idx, s2); - - else - MapKK[idx][s2] = code++; - } - - // Legal positions with both kings on a diagonal are encoded as last ones - for (auto p : bothOnDiagonal) - MapKK[p.first][p.second] = code++; - - // Binomial[] stores the Binomial Coefficients using Pascal rule. There - // are Binomial[k][n] ways to choose k elements from a set of n elements. - Binomial[0][0] = 1; - - for (int n = 1; n < 64; n++) // Squares - for (int k = 0; k < 6 && k <= n; ++k) // Pieces - Binomial[k][n] = - (k > 0 ? Binomial[k - 1][n - 1] : 0) + (k < n ? Binomial[k][n - 1] : 0); - - // MapPawns[s] encodes squares a2-h7 to 0..47. This is the number of possible - // available squares when the leading one is in 's'. Moreover the pawn with - // highest MapPawns[] is the leading pawn, the one nearest the edge, and - // among pawns with the same file, the one with the lowest rank. - int availableSquares = 47; // Available squares when lead pawn is in a2 - - // Init the tables for the encoding of leading pawns group: with 7-men TB we - // can have up to 5 leading pawns (KPPPPPK). - for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt) - for (File f = FILE_A; f <= FILE_D; ++f) - { - // Restart the index at every file because TB table is split - // by file, so we can reuse the same index for different files. - int idx = 0; - - // Sum all possible combinations for a given file, starting with - // the leading pawn on rank 2 and increasing the rank. - for (Rank r = RANK_2; r <= RANK_7; ++r) - { - Square sq = make_square(f, r); - - // Compute MapPawns[] at first pass. - // If sq is the leading pawn square, any other pawn cannot be - // below or more toward the edge of sq. There are 47 available - // squares when sq = a2 and reduced by 2 for any rank increase - // due to mirroring: sq == a3 -> no a2, h2, so MapPawns[a3] = 45 - if (leadPawnsCnt == 1) - { - MapPawns[sq] = availableSquares--; - MapPawns[flip_file(sq)] = availableSquares--; - } - LeadPawnIdx[leadPawnsCnt][sq] = idx; - idx += Binomial[leadPawnsCnt - 1][MapPawns[sq]]; - } - // After a file is traversed, store the cumulated per-file index - LeadPawnsSize[leadPawnsCnt][f] = idx; - } - - // Add entries in TB tables if the corresponding ".rtbw" file exists - for (PieceType p1 = PAWN; p1 < KING; ++p1) - { - TBTables.add({KING, p1, KING}); - - for (PieceType p2 = PAWN; p2 <= p1; ++p2) - { - TBTables.add({KING, p1, p2, KING}); - TBTables.add({KING, p1, KING, p2}); - - for (PieceType p3 = PAWN; p3 < KING; ++p3) - TBTables.add({KING, p1, p2, KING, p3}); - - for (PieceType p3 = PAWN; p3 <= p2; ++p3) - { - TBTables.add({KING, p1, p2, p3, KING}); - - for (PieceType p4 = PAWN; p4 <= p3; ++p4) - { - TBTables.add({KING, p1, p2, p3, p4, KING}); - - for (PieceType p5 = PAWN; p5 <= p4; ++p5) - TBTables.add({KING, p1, p2, p3, p4, p5, KING}); - - for (PieceType p5 = PAWN; p5 < KING; ++p5) - TBTables.add({KING, p1, p2, p3, p4, KING, p5}); - } - - for (PieceType p4 = PAWN; p4 < KING; ++p4) - { - TBTables.add({KING, p1, p2, p3, KING, p4}); - - for (PieceType p5 = PAWN; p5 <= p4; ++p5) - TBTables.add({KING, p1, p2, p3, KING, p4, p5}); - } - } - - for (PieceType p3 = PAWN; p3 <= p1; ++p3) - for (PieceType p4 = PAWN; p4 <= (p1 == p3 ? p2 : p3); ++p4) - TBTables.add({KING, p1, p2, KING, p3, p4}); - } - } - - TBTables.info(); -} - -// Probe the WDL table for a particular position. -// If *result != FAIL, the probe was successful. -// The return value is from the point of view of the side to move: -// -2 : loss -// -1 : loss, but draw under 50-move rule -// 0 : draw -// 1 : win, but draw under 50-move rule -// 2 : win -WDLScore Tablebases::probe_wdl(Position& pos, ProbeState* result) { - - *result = OK; - return search(pos, result); -} - -// Probe the DTZ table for a particular position. -// If *result != FAIL, the probe was successful. -// The return value is from the point of view of the side to move: -// n < -100 : loss, but draw under 50-move rule -// -100 <= n < -1 : loss in n ply (assuming 50-move counter == 0) -// -1 : loss, the side to move is mated -// 0 : draw -// 1 < n <= 100 : win in n ply (assuming 50-move counter == 0) -// 100 < n : win, but draw under 50-move rule -// -// The return value n can be off by 1: a return value -n can mean a loss -// in n+1 ply and a return value +n can mean a win in n+1 ply. This -// cannot happen for tables with positions exactly on the "edge" of -// the 50-move rule. -// -// This implies that if dtz > 0 is returned, the position is certainly -// a win if dtz + 50-move-counter <= 99. Care must be taken that the engine -// picks moves that preserve dtz + 50-move-counter <= 99. -// -// If n = 100 immediately after a capture or pawn move, then the position -// is also certainly a win, and during the whole phase until the next -// capture or pawn move, the inequality to be preserved is -// dtz + 50-move-counter <= 100. -// -// In short, if a move is available resulting in dtz + 50-move-counter <= 99, -// then do not accept moves leading to dtz + 50-move-counter == 100. -int Tablebases::probe_dtz(Position& pos, ProbeState* result) { - - *result = OK; - WDLScore wdl = search(pos, result); - - if (*result == FAIL || wdl == WDLDraw) // DTZ tables don't store draws - return 0; - - // DTZ stores a 'don't care value in this case, or even a plain wrong - // one as in case the best move is a losing ep, so it cannot be probed. - if (*result == ZEROING_BEST_MOVE) - return dtz_before_zeroing(wdl); - - int dtz = probe_table(pos, result, wdl); - - if (*result == FAIL) - return 0; - - if (*result != CHANGE_STM) - return (dtz + 100 * (wdl == WDLBlessedLoss || wdl == WDLCursedWin)) * sign_of(wdl); - - // DTZ stores results for the other side, so we need to do a 1-ply search and - // find the winning move that minimizes DTZ. - StateInfo st; - int minDTZ = 0xFFFF; - - for (const Move move : MoveList(pos)) - { - bool zeroing = pos.capture(move) || type_of(pos.moved_piece(move)) == PAWN; - - pos.do_move(move, st); - - // For zeroing moves we want the dtz of the move _before_ doing it, - // otherwise we will get the dtz of the next move sequence. Search the - // position after the move to get the score sign (because even in a - // winning position we could make a losing capture or go for a draw). - dtz = zeroing ? -dtz_before_zeroing(search(pos, result)) : -probe_dtz(pos, result); - - // If the move mates, force minDTZ to 1 - if (dtz == 1 && pos.checkers() && MoveList(pos).size() == 0) - minDTZ = 1; - - // Convert result from 1-ply search. Zeroing moves are already accounted - // by dtz_before_zeroing() that returns the DTZ of the previous move. - if (!zeroing) - dtz += sign_of(dtz); - - // Skip the draws and if we are winning only pick positive dtz - if (dtz < minDTZ && sign_of(dtz) == sign_of(wdl)) - minDTZ = dtz; - - pos.undo_move(move); - - if (*result == FAIL) - return 0; - } - - // When there are no legal moves, the position is mate: we return -1 - return minDTZ == 0xFFFF ? -1 : minDTZ; -} - - -// Use the DTZ tables to rank root moves. -// -// A return value false indicates that not all probes were successful. -bool Tablebases::root_probe(Position& pos, - Search::RootMoves& rootMoves, - bool rule50, - bool rankDTZ, - const std::function& time_abort) { - - ProbeState result = OK; - StateInfo st; - - // Obtain 50-move counter for the root position - int cnt50 = pos.rule50_count(); - - // Check whether a position was repeated since the last zeroing move. - bool rep = pos.has_repeated(); - - int dtz, bound = rule50 ? (MAX_DTZ / 2 - 100) : 1; - - // Probe and rank each move - for (auto& m : rootMoves) - { - pos.do_move(m.pv[0], st); - - // Calculate dtz for the current move counting from the root position - if (pos.rule50_count() == 0) - { - // In case of a zeroing move, dtz is one of -101/-1/0/1/101 - WDLScore wdl = -probe_wdl(pos, &result); - dtz = dtz_before_zeroing(wdl); - } - else if ((rule50 && pos.is_draw(1)) || pos.is_repetition(1)) - { - // In case a root move leads to a draw by repetition or 50-move rule, - // we set dtz to zero. Note: since we are only 1 ply from the root, - // this must be a true 3-fold repetition inside the game history. - dtz = 0; - } - else - { - // Otherwise, take dtz for the new position and correct by 1 ply - dtz = -probe_dtz(pos, &result); - dtz = dtz > 0 ? dtz + 1 : dtz < 0 ? dtz - 1 : dtz; - } - - // Make sure that a mating move is assigned a dtz value of 1 - if (pos.checkers() && dtz == 2 && MoveList(pos).size() == 0) - dtz = 1; - - pos.undo_move(m.pv[0]); - - if (time_abort() || result == FAIL) - return false; - - // Better moves are ranked higher. Certain wins are ranked equally. - // Losing moves are ranked equally unless a 50-move draw is in sight. - int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ - (rankDTZ ? dtz : 0) - : MAX_DTZ / 2 - (dtz + cnt50)) - : dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ - (rankDTZ ? dtz : 0) - : -MAX_DTZ / 2 + (-dtz + cnt50)) - : 0; - m.tbRank = r; - - // Determine the score to be displayed for this move. Assign at least - // 1 cp to cursed wins and let it grow to 49 cp as the positions gets - // closer to a real win. - m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1 - : r > 0 ? Value((std::max(3, r - (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200) - : r == 0 ? VALUE_DRAW - : r > -bound - ? Value((std::min(-3, r + (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200) - : -VALUE_MATE + MAX_PLY + 1; - } - - return true; -} - - -// Use the WDL tables to rank root moves. -// This is a fallback for the case that some or all DTZ tables are missing. -// -// A return value false indicates that not all probes were successful. -bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50) { - - static const int WDL_to_rank[] = {-MAX_DTZ, -MAX_DTZ + 101, 0, MAX_DTZ - 101, MAX_DTZ}; - - ProbeState result = OK; - StateInfo st; - WDLScore wdl; - - - // Probe and rank each move - for (auto& m : rootMoves) - { - pos.do_move(m.pv[0], st); - - if (pos.is_draw(1)) - wdl = WDLDraw; - else - wdl = -probe_wdl(pos, &result); - - pos.undo_move(m.pv[0]); - - if (result == FAIL) - return false; - - m.tbRank = WDL_to_rank[wdl + 2]; - - if (!rule50) - wdl = wdl > WDLDraw ? WDLWin : wdl < WDLDraw ? WDLLoss : WDLDraw; - m.tbScore = WDL_to_value[wdl + 2]; - } - - return true; -} - -Config Tablebases::rank_root_moves(const OptionsMap& options, - Position& pos, - Search::RootMoves& rootMoves, - bool rankDTZ, - const std::function& time_abort) { - Config config; - - if (rootMoves.empty()) - return config; - - config.rootInTB = false; - config.useRule50 = bool(options["Syzygy50MoveRule"]); - config.probeDepth = int(options["SyzygyProbeDepth"]); - config.cardinality = int(options["SyzygyProbeLimit"]); - - bool dtz_available = true; - - // Tables with fewer pieces than SyzygyProbeLimit are searched with - // probeDepth == DEPTH_ZERO - if (config.cardinality > MaxCardinality) - { - config.cardinality = MaxCardinality; - config.probeDepth = 0; - } - - if (config.cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) - { - // Rank moves using DTZ tables, bail out if time_abort flags zeitnot - config.rootInTB = - root_probe(pos, rootMoves, options["Syzygy50MoveRule"], rankDTZ, time_abort); - - if (!config.rootInTB && !time_abort()) - { - // DTZ tables are missing; try to rank moves using WDL tables - dtz_available = false; - config.rootInTB = root_probe_wdl(pos, rootMoves, options["Syzygy50MoveRule"]); - } - } - - if (config.rootInTB) - { - // Sort moves according to TB rank - std::stable_sort( - rootMoves.begin(), rootMoves.end(), - [](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; }); - - // Probe during search only if DTZ is not available and we are winning - if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW) - config.cardinality = 0; - } - else - { - // Clean up if root_probe() and root_probe_wdl() have failed - for (auto& m : rootMoves) - m.tbRank = 0; - } - - return config; -} -} // namespace Stockfish diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h deleted file mode 100644 index 7b60d6e20546c221c5e1a4f26c5b32bf04396115..0000000000000000000000000000000000000000 --- a/src/syzygy/tbprobe.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef TBPROBE_H -#define TBPROBE_H - -#include -#include -#include - - -namespace Stockfish { -class Position; -class OptionsMap; - -using Depth = int; - -namespace Search { -struct RootMove; -using RootMoves = std::vector; -} -} - -namespace Stockfish::Tablebases { - -struct Config { - int cardinality = 0; - bool rootInTB = false; - bool useRule50 = false; - Depth probeDepth = 0; -}; - -enum WDLScore { - WDLLoss = -2, // Loss - WDLBlessedLoss = -1, // Loss, but draw under 50-move rule - WDLDraw = 0, // Draw - WDLCursedWin = 1, // Win, but draw under 50-move rule - WDLWin = 2, // Win -}; - -// Possible states after a probing operation -enum ProbeState { - FAIL = 0, // Probe failed (missing file table) - OK = 1, // Probe successful - CHANGE_STM = -1, // DTZ should check the other side - ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) -}; - -extern int MaxCardinality; - - -void init(const std::string& paths); -WDLScore probe_wdl(Position& pos, ProbeState* result); -int probe_dtz(Position& pos, ProbeState* result); -bool root_probe(Position& pos, - Search::RootMoves& rootMoves, - bool rule50, - bool rankDTZ, - const std::function& time_abort); -bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50); -Config rank_root_moves( - const OptionsMap& options, - Position& pos, - Search::RootMoves& rootMoves, - bool rankDTZ = false, - const std::function& time_abort = []() { return false; }); - -} // namespace Stockfish::Tablebases - -#endif diff --git a/src/thread.cpp b/src/thread.cpp deleted file mode 100644 index a2f59d5b100f787c0c8abf5efab51bd775f4a93c..0000000000000000000000000000000000000000 --- a/src/thread.cpp +++ /dev/null @@ -1,456 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "thread.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bitboard.h" -#include "history.h" -#include "memory.h" -#include "movegen.h" -#include "search.h" -#include "syzygy/tbprobe.h" -#include "timeman.h" -#include "types.h" -#include "uci.h" -#include "ucioption.h" - -namespace Stockfish { - -// Constructor launches the thread and waits until it goes to sleep -// in idle_loop(). Note that 'searching' and 'exit' should be already set. -Thread::Thread(Search::SharedState& sharedState, - std::unique_ptr sm, - size_t n, - size_t numaN, - size_t totalNumaCount, - OptionalThreadToNumaNodeBinder binder) : - idx(n), - idxInNuma(numaN), - totalNuma(totalNumaCount), - nthreads(sharedState.options["Threads"]), - stdThread(&Thread::idle_loop, this) { - - wait_for_search_finished(); - - run_custom_job([this, &binder, &sharedState, &sm, n]() { - // Use the binder to [maybe] bind the threads to a NUMA node before doing - // the Worker allocation. Ideally we would also allocate the SearchManager - // here, but that's minor. - this->numaAccessToken = binder(); - this->worker = make_unique_large_page( - sharedState, std::move(sm), n, idxInNuma, totalNuma, this->numaAccessToken); - }); - - wait_for_search_finished(); -} - - -// Destructor wakes up the thread in idle_loop() and waits -// for its termination. Thread should be already waiting. -Thread::~Thread() { - - assert(!searching); - - exit = true; - start_searching(); - stdThread.join(); -} - -// Wakes up the thread that will start the search -void Thread::start_searching() { - assert(worker != nullptr); - run_custom_job([this]() { worker->start_searching(); }); -} - -// Clears the histories for the thread worker (usually before a new game) -void Thread::clear_worker() { - assert(worker != nullptr); - run_custom_job([this]() { worker->clear(); }); -} - -// Blocks on the condition variable until the thread has finished searching -void Thread::wait_for_search_finished() { - - std::unique_lock lk(mutex); - cv.wait(lk, [&] { return !searching; }); -} - -// Launching a function in the thread -void Thread::run_custom_job(std::function f) { - { - std::unique_lock lk(mutex); - cv.wait(lk, [&] { return !searching; }); - jobFunc = std::move(f); - searching = true; - } - cv.notify_one(); -} - -void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); } - -// Thread gets parked here, blocked on the condition variable -// when the thread has no work to do. - -void Thread::idle_loop() { - while (true) - { - std::unique_lock lk(mutex); - searching = false; - cv.notify_one(); // Wake up anyone waiting for search finished - cv.wait(lk, [&] { return searching; }); - - if (exit) - return; - - std::function job = std::move(jobFunc); - jobFunc = nullptr; - - lk.unlock(); - - if (job) - job(); - } -} - -Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); } - -uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); } -uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); } - -static size_t next_power_of_two(uint64_t count) { return count > 1 ? (2ULL << msb(count - 1)) : 1; } - -// Creates/destroys threads to match the requested number. -// Created and launched threads will immediately go to sleep in idle_loop. -// Upon resizing, threads are recreated to allow for binding if necessary. -void ThreadPool::set(const NumaConfig& numaConfig, - Search::SharedState sharedState, - const Search::SearchManager::UpdateContext& updateContext) { - - if (threads.size() > 0) // destroy any existing thread(s) - { - main_thread()->wait_for_search_finished(); - - threads.clear(); - - boundThreadToNumaNode.clear(); - } - - const size_t requested = sharedState.options["Threads"]; - - if (requested > 0) // create new thread(s) - { - // Binding threads may be problematic when there's multiple NUMA nodes and - // multiple Stockfish instances running. In particular, if each instance - // runs a single thread then they would all be mapped to the first NUMA node. - // This is undesirable, and so the default behaviour (i.e. when the user does not - // change the NumaConfig UCI setting) is to not bind the threads to processors - // unless we know for sure that we span NUMA nodes and replication is required. - const std::string numaPolicy(sharedState.options["NumaPolicy"]); - const bool doBindThreads = [&]() { - if (numaPolicy == "none") - return false; - - if (numaPolicy == "auto") - return numaConfig.suggests_binding_threads(requested); - - // numaPolicy == "system", or explicitly set by the user - return true; - }(); - - std::map counts; - boundThreadToNumaNode = doBindThreads - ? numaConfig.distribute_threads_among_numa_nodes(requested) - : std::vector{}; - - if (boundThreadToNumaNode.empty()) - counts[0] = requested; // Pretend all threads are part of numa node 0 - else - { - for (size_t i = 0; i < boundThreadToNumaNode.size(); ++i) - counts[boundThreadToNumaNode[i]]++; - } - - sharedState.sharedHistories.clear(); - for (auto pair : counts) - { - NumaIndex numaIndex = pair.first; - uint64_t count = pair.second; - auto f = [&]() { - sharedState.sharedHistories.try_emplace(numaIndex, next_power_of_two(count)); - }; - if (doBindThreads) - numaConfig.execute_on_numa_node(numaIndex, f); - else - f(); - } - - auto threadsPerNode = counts; - counts.clear(); - - while (threads.size() < requested) - { - const size_t threadId = threads.size(); - const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0; - auto create_thread = [&]() { - auto manager = threadId == 0 - ? std::unique_ptr( - std::make_unique(updateContext)) - : std::make_unique(); - - // When not binding threads we want to force all access to happen - // from the same NUMA node, because in case of NUMA replicated memory - // accesses we don't want to trash cache in case the threads get scheduled - // on the same NUMA node. - auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId) - : OptionalThreadToNumaNodeBinder(numaId); - - threads.emplace_back(std::make_unique(sharedState, std::move(manager), - threadId, counts[numaId]++, - threadsPerNode[numaId], binder)); - }; - - // Ensure the worker thread inherits the intended NUMA affinity at creation. - if (doBindThreads) - numaConfig.execute_on_numa_node(numaId, create_thread); - else - create_thread(); - } - - clear(); - - main_thread()->wait_for_search_finished(); - } -} - - -// Sets threadPool data to initial values -void ThreadPool::clear() { - if (threads.size() == 0) - return; - - for (auto&& th : threads) - th->clear_worker(); - - for (auto&& th : threads) - th->wait_for_search_finished(); - - // These two affect the time taken on the first move of a game: - main_manager()->bestPreviousAverageScore = VALUE_INFINITE; - main_manager()->previousTimeReduction = 0.85; - - main_manager()->callsCnt = 0; - main_manager()->bestPreviousScore = VALUE_INFINITE; - main_manager()->originalTimeAdjust = -1; - main_manager()->tm.clear(); -} - -void ThreadPool::run_on_thread(size_t threadId, std::function f) { - assert(threads.size() > threadId); - threads[threadId]->run_custom_job(std::move(f)); -} - -void ThreadPool::wait_on_thread(size_t threadId) { - assert(threads.size() > threadId); - threads[threadId]->wait_for_search_finished(); -} - -size_t ThreadPool::num_threads() const { return threads.size(); } - - -// Wakes up main thread waiting in idle_loop() and returns immediately. -// Main thread will wake up other threads and start the search. -void ThreadPool::start_thinking(const OptionsMap& options, - Position& pos, - StateListPtr& states, - Search::LimitsType limits) { - - main_thread()->wait_for_search_finished(); - - main_manager()->stopOnPonderhit = stop = false; - main_manager()->ponder = limits.ponderMode; - - increaseDepth = true; - - Search::RootMoves rootMoves; - const auto legalmoves = MoveList(pos); - - for (const auto& uciMove : limits.searchmoves) - { - auto move = UCIEngine::to_move(pos, uciMove); - - if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end()) - rootMoves.emplace_back(move); - } - - if (rootMoves.empty()) - for (const auto& m : legalmoves) - rootMoves.emplace_back(m); - - Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves); - - // After ownership transfer 'states' becomes empty, so if we stop the search - // and call 'go' again without setting a new position states.get() == nullptr. - assert(states.get() || setupStates.get()); - - if (states.get()) - setupStates = std::move(states); // Ownership transfer, states is now empty - - // We use Position::set() to set root position across threads. But there are - // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot - // be deduced from a fen string, so set() clears them and they are set from - // setupStates->back() later. The rootState is per thread, earlier states are - // shared since they are read-only. - for (auto&& th : threads) - { - th->run_custom_job([&]() { - th->worker->limits = limits; - th->worker->nodes = th->worker->tbHits = th->worker->bestMoveChanges = 0; - th->worker->nmpMinPly = 0; - th->worker->rootDepth = th->worker->completedDepth = 0; - th->worker->rootMoves = rootMoves; - th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState); - th->worker->rootState = setupStates->back(); - th->worker->tbConfig = tbConfig; - }); - } - - for (auto&& th : threads) - th->wait_for_search_finished(); - - main_thread()->start_searching(); -} - -Thread* ThreadPool::get_best_thread() const { - - Thread* bestThread = threads.front().get(); - Value minScore = VALUE_NONE; - - std::unordered_map votes( - 2 * std::min(size(), bestThread->worker->rootMoves.size())); - - // Find the minimum score of all threads - for (auto&& th : threads) - minScore = std::min(minScore, th->worker->rootMoves[0].score); - - // Vote according to score and depth, and select the best thread - auto thread_voting_value = [minScore](Thread* th) { - return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth); - }; - - for (auto&& th : threads) - votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get()); - - for (auto&& th : threads) - { - const auto bestThreadScore = bestThread->worker->rootMoves[0].score; - const auto newThreadScore = th->worker->rootMoves[0].score; - - const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv; - const auto& newThreadPV = th->worker->rootMoves[0].pv; - - const auto bestThreadMoveVote = votes[bestThreadPV[0]]; - const auto newThreadMoveVote = votes[newThreadPV[0]]; - - const bool bestThreadInProvenWin = is_win(bestThreadScore); - const bool newThreadInProvenWin = is_win(newThreadScore); - - const bool bestThreadInProvenLoss = - bestThreadScore != -VALUE_INFINITE && is_loss(bestThreadScore); - const bool newThreadInProvenLoss = - newThreadScore != -VALUE_INFINITE && is_loss(newThreadScore); - - // We make sure not to pick a thread with truncated principal variation - const bool betterVotingValue = - thread_voting_value(th.get()) * int(newThreadPV.size() > 2) - > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2); - - if (bestThreadInProvenWin) - { - // Make sure we pick the shortest mate / TB conversion - if (newThreadScore > bestThreadScore) - bestThread = th.get(); - } - else if (bestThreadInProvenLoss) - { - // Make sure we pick the shortest mated / TB conversion - if (newThreadInProvenLoss && newThreadScore < bestThreadScore) - bestThread = th.get(); - } - else if (newThreadInProvenWin || newThreadInProvenLoss - || (!is_loss(newThreadScore) - && (newThreadMoveVote > bestThreadMoveVote - || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue)))) - bestThread = th.get(); - } - - return bestThread; -} - - -// Start non-main threads. -// Will be invoked by main thread after it has started searching. -void ThreadPool::start_searching() { - - for (auto&& th : threads) - if (th != threads.front()) - th->start_searching(); -} - - -// Wait for non-main threads -void ThreadPool::wait_for_search_finished() const { - - for (auto&& th : threads) - if (th != threads.front()) - th->wait_for_search_finished(); -} - -std::vector ThreadPool::get_bound_thread_count_by_numa_node() const { - std::vector counts; - - if (!boundThreadToNumaNode.empty()) - { - NumaIndex highestNumaNode = 0; - for (NumaIndex n : boundThreadToNumaNode) - if (n > highestNumaNode) - highestNumaNode = n; - - counts.resize(highestNumaNode + 1, 0); - - for (NumaIndex n : boundThreadToNumaNode) - counts[n] += 1; - } - - return counts; -} - -void ThreadPool::ensure_network_replicated() { - for (auto&& th : threads) - th->ensure_network_replicated(); -} - -} // namespace Stockfish diff --git a/src/thread.h b/src/thread.h deleted file mode 100644 index d6032d295027dae03929ea8c21a3963158887f8e..0000000000000000000000000000000000000000 --- a/src/thread.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef THREAD_H_INCLUDED -#define THREAD_H_INCLUDED - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "memory.h" -#include "numa.h" -#include "position.h" -#include "search.h" -#include "thread_win32_osx.h" - -namespace Stockfish { - - -class OptionsMap; -using Value = int; - -// Sometimes we don't want to actually bind the threads, but the recipient still -// needs to think it runs on *some* NUMA node, such that it can access structures -// that rely on NUMA node knowledge. This class encapsulates this optional process -// such that the recipient does not need to know whether the binding happened or not. -class OptionalThreadToNumaNodeBinder { - public: - OptionalThreadToNumaNodeBinder(NumaIndex n) : - numaConfig(nullptr), - numaId(n) {} - - OptionalThreadToNumaNodeBinder(const NumaConfig& cfg, NumaIndex n) : - numaConfig(&cfg), - numaId(n) {} - - NumaReplicatedAccessToken operator()() const { - if (numaConfig != nullptr) - return numaConfig->bind_current_thread_to_numa_node(numaId); - else - return NumaReplicatedAccessToken(numaId); - } - - private: - const NumaConfig* numaConfig; - NumaIndex numaId; -}; - -// Abstraction of a thread. It contains a pointer to the worker and a native thread. -// After construction, the native thread is started with idle_loop() -// waiting for a signal to start searching. -// When the signal is received, the thread starts searching and when -// the search is finished, it goes back to idle_loop() waiting for a new signal. -class Thread { - public: - Thread(Search::SharedState&, - std::unique_ptr, - size_t, - size_t, - size_t, - OptionalThreadToNumaNodeBinder); - virtual ~Thread(); - - void idle_loop(); - void start_searching(); - void clear_worker(); - void run_custom_job(std::function f); - - void ensure_network_replicated(); - - // Thread has been slightly altered to allow running custom jobs, so - // this name is no longer correct. However, this class (and ThreadPool) - // require further work to make them properly generic while maintaining - // appropriate specificity regarding search, from the point of view of an - // outside user, so renaming of this function is left for whenever that happens. - void wait_for_search_finished(); - size_t id() const { return idx; } - - LargePagePtr worker; - std::function jobFunc; - - private: - std::mutex mutex; - std::condition_variable cv; - size_t idx, idxInNuma, totalNuma, nthreads; - bool exit = false, searching = true; // Set before starting std::thread - NativeThread stdThread; - NumaReplicatedAccessToken numaAccessToken; -}; - - -// ThreadPool struct handles all the threads-related stuff like init, starting, -// parking and, most importantly, launching a thread. All the access to threads -// is done through this class. -class ThreadPool { - public: - ThreadPool() {} - - ~ThreadPool() { - // destroy any existing thread(s) - if (threads.size() > 0) - { - main_thread()->wait_for_search_finished(); - - threads.clear(); - } - } - - ThreadPool(const ThreadPool&) = delete; - ThreadPool(ThreadPool&&) = delete; - - ThreadPool& operator=(const ThreadPool&) = delete; - ThreadPool& operator=(ThreadPool&&) = delete; - - void start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType); - void run_on_thread(size_t threadId, std::function f); - void wait_on_thread(size_t threadId); - size_t num_threads() const; - void clear(); - void set(const NumaConfig& numaConfig, - Search::SharedState, - const Search::SearchManager::UpdateContext&); - - Search::SearchManager* main_manager(); - Thread* main_thread() const { return threads.front().get(); } - uint64_t nodes_searched() const; - uint64_t tb_hits() const; - Thread* get_best_thread() const; - void start_searching(); - void wait_for_search_finished() const; - - std::vector get_bound_thread_count_by_numa_node() const; - - void ensure_network_replicated(); - - std::atomic_bool stop, increaseDepth; - - auto cbegin() const noexcept { return threads.cbegin(); } - auto begin() noexcept { return threads.begin(); } - auto end() noexcept { return threads.end(); } - auto cend() const noexcept { return threads.cend(); } - auto size() const noexcept { return threads.size(); } - auto empty() const noexcept { return threads.empty(); } - - private: - StateListPtr setupStates; - std::vector> threads; - std::vector boundThreadToNumaNode; - - uint64_t accumulate(std::atomic Search::Worker::* member) const { - - uint64_t sum = 0; - for (auto&& th : threads) - sum += (th->worker.get()->*member).load(std::memory_order_relaxed); - return sum; - } -}; - -} // namespace Stockfish - -#endif // #ifndef THREAD_H_INCLUDED diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h deleted file mode 100644 index 5a8d43a2e7718ed8e6541c212ca6c7b3d6f59cfe..0000000000000000000000000000000000000000 --- a/src/thread_win32_osx.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef THREAD_WIN32_OSX_H_INCLUDED -#define THREAD_WIN32_OSX_H_INCLUDED - -#include - -// On OSX threads other than the main thread are created with a reduced stack -// size of 512KB by default, this is too low for deep searches, which require -// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE. -// The implementation calls pthread_create() with the stack size parameter -// equal to the Linux 8MB default, on platforms that support it. - -#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS) - - #include - #include - -namespace Stockfish { - -class NativeThread { - pthread_t thread; - - static constexpr size_t TH_STACK_SIZE = 8 * 1024 * 1024; - - public: - template - explicit NativeThread(Function&& fun, Args&&... args) { - auto func = new std::function( - std::bind(std::forward(fun), std::forward(args)...)); - - pthread_attr_t attr_storage, *attr = &attr_storage; - pthread_attr_init(attr); - pthread_attr_setstacksize(attr, TH_STACK_SIZE); - - auto start_routine = [](void* ptr) -> void* { - auto f = reinterpret_cast*>(ptr); - // Call the function - (*f)(); - delete f; - return nullptr; - }; - - pthread_create(&thread, attr, start_routine, func); - } - - void join() { pthread_join(thread, nullptr); } -}; - -} // namespace Stockfish - -#else // Default case: use STL classes - -namespace Stockfish { - -using NativeThread = std::thread; - -} // namespace Stockfish - -#endif - -#endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED diff --git a/src/timeman.cpp b/src/timeman.cpp deleted file mode 100644 index 4e98081bc2baf4131604322f4d04df3a744c755f..0000000000000000000000000000000000000000 --- a/src/timeman.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "timeman.h" - -#include -#include -#include -#include - -#include "search.h" -#include "ucioption.h" - -namespace Stockfish { - -TimePoint TimeManagement::optimum() const { return optimumTime; } -TimePoint TimeManagement::maximum() const { return maximumTime; } - -void TimeManagement::clear() { - availableNodes = -1; // When in 'nodes as time' mode -} - -void TimeManagement::advance_nodes_time(std::int64_t nodes) { - assert(useNodesTime); - availableNodes = std::max(int64_t(0), availableNodes - nodes); -} - -// Called at the beginning of the search and calculates -// the bounds of time allowed for the current game ply. We currently support: -// 1) x basetime (+ z increment) -// 2) x moves in y seconds (+ z increment) -void TimeManagement::init(Search::LimitsType& limits, - Color us, - int ply, - const OptionsMap& options, - double& originalTimeAdjust) { - TimePoint npmsec = TimePoint(options["nodestime"]); - - // If we have no time, we don't need to fully initialize TM. - // startTime is used by movetime and useNodesTime is used in elapsed calls. - startTime = limits.startTime; - useNodesTime = npmsec != 0; - - if (limits.time[us] == 0) - return; - - TimePoint moveOverhead = TimePoint(options["Move Overhead"]); - - // optScale is a percentage of available time to use for the current move. - // maxScale is a multiplier applied to optimumTime. - double optScale, maxScale; - - // If we have to play in 'nodes as time' mode, then convert from time - // to nodes, and use resulting values in time management formulas. - // WARNING: to avoid time losses, the given npmsec (nodes per millisecond) - // must be much lower than the real engine speed. - if (useNodesTime) - { - if (availableNodes == -1) // Only once at game start - availableNodes = npmsec * limits.time[us]; // Time is in msec - - // Convert from milliseconds to nodes - limits.time[us] = TimePoint(availableNodes); - limits.inc[us] *= npmsec; - limits.npmsec = npmsec; - moveOverhead *= npmsec; - } - - // These numbers are used where multiplications, divisions or comparisons - // with constants are involved. - const int64_t scaleFactor = useNodesTime ? npmsec : 1; - const TimePoint scaledTime = limits.time[us] / scaleFactor; - - // Maximum move horizon - int centiMTG = limits.movestogo ? std::min(limits.movestogo * 100, 5000) : 5051; - - // If less than one second, gradually reduce mtg - if (scaledTime < 1000) - centiMTG = int(scaledTime * 5.051); - - // Make sure timeLeft is > 0 since we may use it as a divisor - TimePoint timeLeft = - std::max(TimePoint(1), - limits.time[us] - + (limits.inc[us] * (centiMTG - 100) - moveOverhead * (200 + centiMTG)) / 100); - - // x basetime (+ z increment) - // If there is a healthy increment, timeLeft can exceed the actual available - // game time for the current move, so also cap to a percentage of available game time. - if (limits.movestogo == 0) - { - // Extra time according to timeLeft - if (originalTimeAdjust < 0) - originalTimeAdjust = 0.3128 * std::log10(timeLeft) - 0.4354; - - // Calculate time constants based on current time left. - double logTimeInSec = std::log10(scaledTime / 1000.0); - double optConstant = std::min(0.0032116 + 0.000321123 * logTimeInSec, 0.00508017); - double maxConstant = std::max(3.3977 + 3.03950 * logTimeInSec, 2.94761); - - optScale = std::min(0.0121431 + std::pow(ply + 2.94693, 0.461073) * optConstant, - 0.213035 * limits.time[us] / timeLeft) - * originalTimeAdjust; - - maxScale = std::min(6.67704, maxConstant + ply / 11.9847); - } - - // x moves in y seconds (+ z increment) - else - { - optScale = - std::min((0.88 + ply / 116.4) / (centiMTG / 100.0), 0.88 * limits.time[us] / timeLeft); - maxScale = 1.3 + 0.11 * (centiMTG / 100.0); - } - - // Limit the maximum possible time for this move - optimumTime = TimePoint(optScale * timeLeft); - maximumTime = - TimePoint(std::min(0.825179 * limits.time[us] - moveOverhead, maxScale * optimumTime)) - 10; - - if (options["Ponder"]) - optimumTime += optimumTime / 4; -} - -} // namespace Stockfish diff --git a/src/timeman.h b/src/timeman.h deleted file mode 100644 index 08e8da10dcc7b5de65dbad53f43751cfc8a527fe..0000000000000000000000000000000000000000 --- a/src/timeman.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef TIMEMAN_H_INCLUDED -#define TIMEMAN_H_INCLUDED - -#include - -#include "misc.h" - -namespace Stockfish { - -class OptionsMap; -enum Color : uint8_t; - -namespace Search { -struct LimitsType; -} - -// The TimeManagement class computes the optimal time to think depending on -// the maximum available time, the game move number, and other parameters. -class TimeManagement { - public: - void init(Search::LimitsType& limits, - Color us, - int ply, - const OptionsMap& options, - double& originalTimeAdjust); - - TimePoint optimum() const; - TimePoint maximum() const; - template - TimePoint elapsed(FUNC nodes) const { - return useNodesTime ? TimePoint(nodes()) : elapsed_time(); - } - TimePoint elapsed_time() const { return now() - startTime; }; - - void clear(); - void advance_nodes_time(std::int64_t nodes); - - private: - TimePoint startTime; - TimePoint optimumTime; - TimePoint maximumTime; - - std::int64_t availableNodes = -1; // When in 'nodes as time' mode - bool useNodesTime = false; // True if we are in 'nodes as time' mode -}; - -} // namespace Stockfish - -#endif // #ifndef TIMEMAN_H_INCLUDED diff --git a/src/tt.cpp b/src/tt.cpp deleted file mode 100644 index ef602809f79a6e17112de220e9cf6b2a47b12df5..0000000000000000000000000000000000000000 --- a/src/tt.cpp +++ /dev/null @@ -1,251 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "tt.h" - -#include -#include -#include -#include -#include - -#include "memory.h" -#include "misc.h" -#include "syzygy/tbprobe.h" -#include "thread.h" - -namespace Stockfish { - - -// TTEntry struct is the 10 bytes transposition table entry, defined as below: -// -// key 16 bit -// depth 8 bit -// generation 5 bit -// pv node 1 bit -// bound type 2 bit -// move 16 bit -// value 16 bit -// evaluation 16 bit -// -// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially. -// Equally, the store order in save() matches this order. - -struct TTEntry { - - // Convert internal bitfields to external types - TTData read() const { - return TTData{Move(move16), Value(value16), - Value(eval16), Depth(depth8 + DEPTH_ENTRY_OFFSET), - Bound(genBound8 & 0x3), bool(genBound8 & 0x4)}; - } - - bool is_occupied() const; - void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); - // The returned age is a multiple of TranspositionTable::GENERATION_DELTA - uint8_t relative_age(const uint8_t generation8) const; - - private: - friend class TranspositionTable; - - uint16_t key16; - uint8_t depth8; - uint8_t genBound8; - Move move16; - int16_t value16; - int16_t eval16; -}; - -// `genBound8` is where most of the details are. We use the following constants to manipulate 5 leading generation bits -// and 3 trailing miscellaneous bits. - -// These bits are reserved for other things. -static constexpr unsigned GENERATION_BITS = 3; -// increment for generation field -static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS); -// cycle length -static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA; -// mask to pull out generation number -static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; - -// DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but -// 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits": -// we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted in `save`.) -bool TTEntry::is_occupied() const { return bool(depth8); } - -// Populates the TTEntry with a new node's data, possibly -// overwriting an old position. The update is not atomic and can be racy. -void TTEntry::save( - Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) { - - // Preserve the old ttmove if we don't have a new one - if (m || uint16_t(k) != key16) - move16 = m; - - // Overwrite less valuable entries (cheapest checks first) - if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_ENTRY_OFFSET + 2 * pv > depth8 - 4 - || relative_age(generation8)) - { - assert(d > DEPTH_ENTRY_OFFSET); - assert(d < 256 + DEPTH_ENTRY_OFFSET); - - key16 = uint16_t(k); - depth8 = uint8_t(d - DEPTH_ENTRY_OFFSET); - genBound8 = uint8_t(generation8 | uint8_t(pv) << 2 | b); - value16 = int16_t(v); - eval16 = int16_t(ev); - } -} - - -uint8_t TTEntry::relative_age(const uint8_t generation8) const { - // Due to our packed storage format for generation and its cyclic - // nature we add GENERATION_CYCLE (256 is the modulus, plus what - // is needed to keep the unrelated lowest n bits from affecting - // the result) to calculate the entry age correctly even after - // generation8 overflows into the next cycle. - return (GENERATION_CYCLE + generation8 - genBound8) & GENERATION_MASK; -} - - -// TTWriter is but a very thin wrapper around the pointer -TTWriter::TTWriter(TTEntry* tte) : - entry(tte) {} - -void TTWriter::write( - Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) { - entry->save(k, v, pv, b, d, m, ev, generation8); -} - - -// A TranspositionTable is an array of Cluster, of size clusterCount. Each cluster consists of ClusterSize number -// of TTEntry. Each non-empty TTEntry contains information on exactly one position. The size of a Cluster should -// divide the size of a cache line for best performance, as the cacheline is prefetched when possible. - -static constexpr int ClusterSize = 3; - -struct Cluster { - TTEntry entry[ClusterSize]; - char padding[2]; // Pad to 32 bytes -}; - -static_assert(sizeof(Cluster) == 32, "Suboptimal Cluster size"); - - -// Sets the size of the transposition table, -// measured in megabytes. Transposition table consists -// of clusters and each cluster consists of ClusterSize number of TTEntry. -void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) { - aligned_large_pages_free(table); - - clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - - table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); - - if (!table) - { - std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; - exit(EXIT_FAILURE); - } - - clear(threads); -} - - -// Initializes the entire transposition table to zero, -// in a multi-threaded way. -void TranspositionTable::clear(ThreadPool& threads) { - generation8 = 0; - const size_t threadCount = threads.num_threads(); - - for (size_t i = 0; i < threadCount; ++i) - { - threads.run_on_thread(i, [this, i, threadCount]() { - // Each thread will zero its part of the hash table - const size_t stride = clusterCount / threadCount; - const size_t start = stride * i; - const size_t len = i + 1 != threadCount ? stride : clusterCount - start; - - std::memset(&table[start], 0, len * sizeof(Cluster)); - }); - } - - for (size_t i = 0; i < threadCount; ++i) - threads.wait_on_thread(i); -} - - -// Returns an approximation of the hashtable -// occupation during a search. The hash is x permill full, as per UCI protocol. -// Only counts entries which match the current generation. -int TranspositionTable::hashfull(int maxAge) const { - int maxAgeInternal = maxAge << GENERATION_BITS; - int cnt = 0; - for (int i = 0; i < 1000; ++i) - for (int j = 0; j < ClusterSize; ++j) - cnt += table[i].entry[j].is_occupied() - && table[i].entry[j].relative_age(generation8) <= maxAgeInternal; - - return cnt / ClusterSize; -} - - -void TranspositionTable::new_search() { - // increment by delta to keep lower bits as is - generation8 += GENERATION_DELTA; -} - - -uint8_t TranspositionTable::generation() const { return generation8; } - - -// Looks up the current position in the transposition -// table. It returns true if the position is found. -// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry -// to be replaced later. The replace value of an entry is calculated as its depth -// minus 8 times its relative age. TTEntry t1 is considered more valuable than -// TTEntry t2 if its replace value is greater than that of t2. -std::tuple TranspositionTable::probe(const Key key) const { - - TTEntry* const tte = first_entry(key); - const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster - - for (int i = 0; i < ClusterSize; ++i) - if (tte[i].key16 == key16) - // This gap is the main place for read races. - // After `read()` completes that copy is final, but may be self-inconsistent. - return {tte[i].is_occupied(), tte[i].read(), TTWriter(&tte[i])}; - - // Find an entry to be replaced according to the replacement strategy - TTEntry* replace = tte; - for (int i = 1; i < ClusterSize; ++i) - if (replace->depth8 - replace->relative_age(generation8) - > tte[i].depth8 - tte[i].relative_age(generation8)) - replace = &tte[i]; - - return {false, - TTData{Move::none(), VALUE_NONE, VALUE_NONE, DEPTH_ENTRY_OFFSET, BOUND_NONE, false}, - TTWriter(replace)}; -} - - -TTEntry* TranspositionTable::first_entry(const Key key) const { - return &table[mul_hi64(key, clusterCount)].entry[0]; -} - -} // namespace Stockfish diff --git a/src/tt.h b/src/tt.h deleted file mode 100644 index 38f6c8f4f6263db985ac2cc3b6d68c7b79674cb0..0000000000000000000000000000000000000000 --- a/src/tt.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef TT_H_INCLUDED -#define TT_H_INCLUDED - -#include -#include -#include - -#include "memory.h" -#include "types.h" - -namespace Stockfish { - -class ThreadPool; -struct TTEntry; -struct Cluster; - -// There is only one global hash table for the engine and all its threads. For chess in particular, we even allow racy -// updates between threads to and from the TT, as taking the time to synchronize access would cost thinking time and -// thus elo. As a hash table, collisions are possible and may cause chess playing issues (bizarre blunders, faulty mate -// reports, etc). Fixing these also loses elo; however such risk decreases quickly with larger TT size. -// -// `probe` is the primary method: given a board position, we lookup its entry in the table, and return a tuple of: -// 1) whether the entry already has this position -// 2) a copy of the prior data (if any) (may be inconsistent due to read races) -// 3) a writer object to this entry -// The copied data and the writer are separated to maintain clear boundaries between local vs global objects. - - -// A copy of the data already in the entry (possibly collided). `probe` may be racy, resulting in inconsistent data. -struct TTData { - Move move; - Value value, eval; - Depth depth; - Bound bound; - bool is_pv; - - TTData() = delete; - - // clang-format off - TTData(Move m, Value v, Value ev, Depth d, Bound b, bool pv) : - move(m), - value(v), - eval(ev), - depth(d), - bound(b), - is_pv(pv) {}; - // clang-format on -}; - - -// This is used to make racy writes to the global TT. -struct TTWriter { - public: - void write(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); - - private: - friend class TranspositionTable; - TTEntry* entry; - TTWriter(TTEntry* tte); -}; - - -class TranspositionTable { - - public: - ~TranspositionTable() { aligned_large_pages_free(table); } - - void resize(size_t mbSize, ThreadPool& threads); // Set TT size - void clear(ThreadPool& threads); // Re-initialize memory, multithreaded - int hashfull(int maxAge = 0) - const; // Approximate what fraction of entries (permille) have been written to during this root search - - void - new_search(); // This must be called at the beginning of each root search to track entry aging - uint8_t generation() const; // The current age, used when writing new data to the TT - std::tuple - probe(const Key key) const; // The main method, whose retvals separate local vs global objects - TTEntry* first_entry(const Key key) - const; // This is the hash function; its only external use is memory prefetching. - - private: - friend struct TTEntry; - - size_t clusterCount; - Cluster* table = nullptr; - - uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8 -}; - -} // namespace Stockfish - -#endif // #ifndef TT_H_INCLUDED diff --git a/src/tune.cpp b/src/tune.cpp deleted file mode 100644 index f930c267e22ecb6b68712c1f89da3b03720bf77a..0000000000000000000000000000000000000000 --- a/src/tune.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "tune.h" - -#include -#include -#include -#include -#include -#include - -#include "ucioption.h" - -using std::string; - -namespace Stockfish { - -bool Tune::update_on_last; -const Option* LastOption = nullptr; -OptionsMap* Tune::options; -namespace { -std::map TuneResults; - -std::optional on_tune(const Option& o) { - - if (!Tune::update_on_last || LastOption == &o) - Tune::read_options(); - - return std::nullopt; -} -} - -void Tune::make_option(OptionsMap* opts, const string& n, int v, const SetRange& r) { - - // Do not generate option when there is nothing to tune (ie. min = max) - if (r(v).first == r(v).second) - return; - - if (TuneResults.count(n)) - v = TuneResults[n]; - - opts->add(n, Option(v, r(v).first, r(v).second, on_tune)); - LastOption = &((*opts)[n]); - - // Print formatted parameters, ready to be copy-pasted in Fishtest - std::cout << n << "," // - << v << "," // - << r(v).first << "," // - << r(v).second << "," // - << (r(v).second - r(v).first) / 20.0 << "," // - << "0.0020" << std::endl; -} - -string Tune::next(string& names, bool pop) { - - string name; - - do - { - string token = names.substr(0, names.find(',')); - - if (pop) - names.erase(0, token.size() + 1); - - std::stringstream ws(token); - name += (ws >> token, token); // Remove trailing whitespace - - } while (std::count(name.begin(), name.end(), '(') - std::count(name.begin(), name.end(), ')')); - - return name; -} - - -template<> -void Tune::Entry::init_option() { - make_option(options, name, value, range); -} - -template<> -void Tune::Entry::read_option() { - if (options->count(name)) - value = int((*options)[name]); -} - -// Instead of a variable here we have a PostUpdate function: just call it -template<> -void Tune::Entry::init_option() {} -template<> -void Tune::Entry::read_option() { - value(); -} - -} // namespace Stockfish - - -// Init options with tuning session results instead of default values. Useful to -// get correct bench signature after a tuning session or to test tuned values. -// Just copy fishtest tuning results in a result.txt file and extract the -// values with: -// -// cat results.txt | sed 's/^param: \([^,]*\), best: \([^,]*\).*/ TuneResults["\1"] = int(round(\2));/' -// -// Then paste the output below, as the function body - - -namespace Stockfish { - -void Tune::read_results() { /* ...insert your values here... */ } - -} // namespace Stockfish diff --git a/src/tune.h b/src/tune.h deleted file mode 100644 index 4ce6e759fde3b7920e3649dce438e185eb6bd42f..0000000000000000000000000000000000000000 --- a/src/tune.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef TUNE_H_INCLUDED -#define TUNE_H_INCLUDED - -#include -#include -#include -#include // IWYU pragma: keep -#include -#include - -namespace Stockfish { - -class OptionsMap; - -using Range = std::pair; // Option's min-max values -using RangeFun = Range(int); - -// Default Range function, to calculate Option's min-max values -inline Range default_range(int v) { return v > 0 ? Range(0, 2 * v) : Range(2 * v, 0); } - -struct SetRange { - explicit SetRange(RangeFun f) : - fun(f) {} - SetRange(int min, int max) : - fun(nullptr), - range(min, max) {} - Range operator()(int v) const { return fun ? fun(v) : range; } - - RangeFun* fun; - Range range; -}; - -#define SetDefaultRange SetRange(default_range) - - -// Tune class implements the 'magic' code that makes the setup of a fishtest tuning -// session as easy as it can be. Mainly you have just to remove const qualifiers -// from the variables you want to tune and flag them for tuning, so if you have: -// -// const Value myValue[][2] = { { V(100), V(20) }, { V(7), V(78) } }; -// -// If you have a my_post_update() function to run after values have been updated, -// and a my_range() function to set custom Option's min-max values, then you just -// remove the 'const' qualifiers and write somewhere below in the file: -// -// TUNE(SetRange(my_range), myValue, my_post_update); -// -// You can also set the range directly, and restore the default at the end -// -// TUNE(SetRange(-100, 100), myValue, SetDefaultRange); -// -// In case update function is slow and you have many parameters, you can add: -// -// UPDATE_ON_LAST(); -// -// And the values update, including post update function call, will be done only -// once, after the engine receives the last UCI option, that is the one defined -// and created as the last one, so the GUI should send the options in the same -// order in which have been defined. - -class Tune { - - using PostUpdate = void(); // Post-update function - - Tune() { read_results(); } - Tune(const Tune&) = delete; - void operator=(const Tune&) = delete; - void read_results(); - - static Tune& instance() { - static Tune t; - return t; - } // Singleton - - // Use polymorphism to accommodate Entry of different types in the same vector - struct EntryBase { - virtual ~EntryBase() = default; - virtual void init_option() = 0; - virtual void read_option() = 0; - }; - - template - struct Entry: public EntryBase { - - static_assert(!std::is_const_v, "Parameter cannot be const!"); - - static_assert(std::is_same_v || std::is_same_v, - "Parameter type not supported!"); - - Entry(const std::string& n, T& v, const SetRange& r) : - name(n), - value(v), - range(r) {} - void operator=(const Entry&) = delete; // Because 'value' is a reference - void init_option() override; - void read_option() override; - - std::string name; - T& value; - SetRange range; - }; - - // Our facility to fill the container, each Entry corresponds to a parameter - // to tune. We use variadic templates to deal with an unspecified number of - // entries, each one of a possible different type. - static std::string next(std::string& names, bool pop = true); - - int add(const SetRange&, std::string&&) { return 0; } - - template - int add(const SetRange& range, std::string&& names, T& value, Args&&... args) { - list.push_back(std::unique_ptr(new Entry(next(names), value, range))); - return add(range, std::move(names), args...); - } - - // Template specialization for arrays: recursively handle multi-dimensional arrays - template - int add(const SetRange& range, std::string&& names, T (&value)[N], Args&&... args) { - for (size_t i = 0; i < N; i++) - add(range, next(names, i == N - 1) + "[" + std::to_string(i) + "]", value[i]); - return add(range, std::move(names), args...); - } - - // Template specialization for SetRange - template - int add(const SetRange&, std::string&& names, SetRange& value, Args&&... args) { - return add(value, (next(names), std::move(names)), args...); - } - - static void make_option(OptionsMap* options, const std::string& n, int v, const SetRange& r); - - std::vector> list; - - public: - template - static int add(const std::string& names, Args&&... args) { - return instance().add(SetDefaultRange, names.substr(1, names.size() - 2), - args...); // Remove trailing parenthesis - } - static void init(OptionsMap& o) { - options = &o; - for (auto& e : instance().list) - e->init_option(); - read_options(); - } // Deferred, due to UCIEngine::Options access - static void read_options() { - for (auto& e : instance().list) - e->read_option(); - } - - static bool update_on_last; - static OptionsMap* options; -}; - -template -constexpr void tune_check_args(Args&&...) { - static_assert((!std::is_fundamental_v && ...), "TUNE macro arguments wrong"); -} - -// Some macro magic :-) we define a dummy int variable that the compiler initializes calling Tune::add() -#define STRINGIFY(x) #x -#define UNIQUE2(x, y) x##y -#define UNIQUE(x, y) UNIQUE2(x, y) // Two indirection levels to expand __LINE__ -#define TUNE(...) \ - int UNIQUE(p, __LINE__) = []() -> int { \ - tune_check_args(__VA_ARGS__); \ - return Tune::add(STRINGIFY((__VA_ARGS__)), __VA_ARGS__); \ - }(); - -#define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true - -} // namespace Stockfish - -#endif // #ifndef TUNE_H_INCLUDED diff --git a/src/types.h b/src/types.h deleted file mode 100644 index bfaa658e9c9b7d3b87e38ed5ab47c27ab07bd574..0000000000000000000000000000000000000000 --- a/src/types.h +++ /dev/null @@ -1,492 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef TYPES_H_INCLUDED - #define TYPES_H_INCLUDED - -// When compiling with provided Makefile (e.g. for Linux and OSX), configuration -// is done automatically. To get started type 'make help'. -// -// When Makefile is not used (e.g. with Microsoft Visual Studio) some switches -// need to be set manually: -// -// -DNDEBUG | Disable debugging mode. Always use this for release. -// -// -DNO_PREFETCH | Disable use of prefetch asm-instruction. You may need this to -// | run on some very old machines. -// -// -DUSE_POPCNT | Add runtime support for use of popcnt asm-instruction. Works -// | only in 64-bit mode and requires hardware with popcnt support. -// -// -DUSE_PEXT | Add runtime support for use of pext asm-instruction. Works -// | only in 64-bit mode and requires hardware with pext support. - - #include - #include - #include - #include - #include "misc.h" - - #if defined(_MSC_VER) - // Disable some silly and noisy warnings from MSVC compiler - #pragma warning(disable: 4127) // Conditional expression is constant - #pragma warning(disable: 4146) // Unary minus operator applied to unsigned type - #pragma warning(disable: 4800) // Forcing value to bool 'true' or 'false' - #endif - -// Predefined macros hell: -// -// __GNUC__ Compiler is GCC, Clang or ICX -// __clang__ Compiler is Clang or ICX -// __INTEL_LLVM_COMPILER Compiler is ICX -// _MSC_VER Compiler is MSVC -// _WIN32 Building on Windows (any) -// _WIN64 Building on Windows 64 bit - -// Enforce minimum GCC version - #if defined(__GNUC__) && !defined(__clang__) \ - && (__GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ < 3)) - #error "Stockfish requires GCC 9.3 or later for correct compilation" - #endif - - // Enforce minimum Clang version - #if defined(__clang__) && (__clang_major__ < 10) - #error "Stockfish requires Clang 10.0 or later for correct compilation" - #endif - - #define ASSERT_ALIGNED(ptr, alignment) assert(reinterpret_cast(ptr) % alignment == 0) - - #if defined(_WIN64) && defined(_MSC_VER) // No Makefile used - #include // Microsoft header for _BitScanForward64() - #define IS_64BIT - #endif - - #if defined(USE_POPCNT) && defined(_MSC_VER) - #include // Microsoft header for _mm_popcnt_u64() - #endif - - #if !defined(NO_PREFETCH) && defined(_MSC_VER) - #include // Microsoft header for _mm_prefetch() - #endif - - #if defined(USE_PEXT) - #include // Header for _pext_u64() intrinsic - #define pext(b, m) _pext_u64(b, m) - #else - #define pext(b, m) 0 - #endif - -namespace Stockfish { - - #ifdef USE_POPCNT -constexpr bool HasPopCnt = true; - #else -constexpr bool HasPopCnt = false; - #endif - - #ifdef USE_PEXT -constexpr bool HasPext = true; - #else -constexpr bool HasPext = false; - #endif - - #ifdef IS_64BIT -constexpr bool Is64Bit = true; - #else -constexpr bool Is64Bit = false; - #endif - -using Key = uint64_t; -using Bitboard = uint64_t; - -constexpr int MAX_MOVES = 256; -constexpr int MAX_PLY = 246; - -enum Color : uint8_t { - WHITE, - BLACK, - COLOR_NB = 2 -}; - -enum CastlingRights : uint8_t { - NO_CASTLING, - WHITE_OO, - WHITE_OOO = WHITE_OO << 1, - BLACK_OO = WHITE_OO << 2, - BLACK_OOO = WHITE_OO << 3, - - KING_SIDE = WHITE_OO | BLACK_OO, - QUEEN_SIDE = WHITE_OOO | BLACK_OOO, - WHITE_CASTLING = WHITE_OO | WHITE_OOO, - BLACK_CASTLING = BLACK_OO | BLACK_OOO, - ANY_CASTLING = WHITE_CASTLING | BLACK_CASTLING, - - CASTLING_RIGHT_NB = 16 -}; - -enum Bound : uint8_t { - BOUND_NONE, - BOUND_UPPER, - BOUND_LOWER, - BOUND_EXACT = BOUND_UPPER | BOUND_LOWER -}; - -// Value is used as an alias for int, this is done to differentiate between a search -// value and any other integer value. The values used in search are always supposed -// to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range. -using Value = int; - -constexpr Value VALUE_ZERO = 0; -constexpr Value VALUE_DRAW = 0; -constexpr Value VALUE_NONE = 32002; -constexpr Value VALUE_INFINITE = 32001; - -constexpr Value VALUE_MATE = 32000; -constexpr Value VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY; -constexpr Value VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY; - -constexpr Value VALUE_TB = VALUE_MATE_IN_MAX_PLY - 1; -constexpr Value VALUE_TB_WIN_IN_MAX_PLY = VALUE_TB - MAX_PLY; -constexpr Value VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY; - - -constexpr bool is_valid(Value value) { return value != VALUE_NONE; } - -constexpr bool is_win(Value value) { - assert(is_valid(value)); - return value >= VALUE_TB_WIN_IN_MAX_PLY; -} - -constexpr bool is_loss(Value value) { - assert(is_valid(value)); - return value <= VALUE_TB_LOSS_IN_MAX_PLY; -} - -constexpr bool is_decisive(Value value) { return is_win(value) || is_loss(value); } - -// In the code, we make the assumption that these values -// are such that non_pawn_material() can be used to uniquely -// identify the material on the board. -constexpr Value PawnValue = 208; -constexpr Value KnightValue = 781; -constexpr Value BishopValue = 825; -constexpr Value RookValue = 1276; -constexpr Value QueenValue = 2538; - - -// clang-format off -enum PieceType : std::uint8_t { - NO_PIECE_TYPE, PAWN, KNIGHT, BISHOP, ROOK, QUEEN, KING, - ALL_PIECES = 0, - PIECE_TYPE_NB = 8 -}; - -enum Piece : std::uint8_t { - NO_PIECE, - W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, - B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, - PIECE_NB = 16 -}; -// clang-format on - -constexpr Value PieceValue[PIECE_NB] = { - VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO, - VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO}; - -using Depth = int; - -// The following DEPTH_ constants are used for transposition table entries -// and quiescence search move generation stages. In regular search, the -// depth stored in the transposition table is literal: the search depth -// (effort) used to make the corresponding transposition table value. In -// quiescence search, however, the transposition table entries only store -// the current quiescence move generation stage (which should thus compare -// lower than any regular search depth). -constexpr Depth DEPTH_QS = 0; -// For transposition table entries where no searching at all was done -// (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus -// compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET -// is used only for the transposition table entry occupancy check (see tt.cpp), -// and should thus be lower than DEPTH_UNSEARCHED. -constexpr Depth DEPTH_UNSEARCHED = -2; -constexpr Depth DEPTH_ENTRY_OFFSET = -3; - -// clang-format off -enum Square : uint8_t { - SQ_A1, SQ_B1, SQ_C1, SQ_D1, SQ_E1, SQ_F1, SQ_G1, SQ_H1, - SQ_A2, SQ_B2, SQ_C2, SQ_D2, SQ_E2, SQ_F2, SQ_G2, SQ_H2, - SQ_A3, SQ_B3, SQ_C3, SQ_D3, SQ_E3, SQ_F3, SQ_G3, SQ_H3, - SQ_A4, SQ_B4, SQ_C4, SQ_D4, SQ_E4, SQ_F4, SQ_G4, SQ_H4, - SQ_A5, SQ_B5, SQ_C5, SQ_D5, SQ_E5, SQ_F5, SQ_G5, SQ_H5, - SQ_A6, SQ_B6, SQ_C6, SQ_D6, SQ_E6, SQ_F6, SQ_G6, SQ_H6, - SQ_A7, SQ_B7, SQ_C7, SQ_D7, SQ_E7, SQ_F7, SQ_G7, SQ_H7, - SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8, - SQ_NONE, - - SQUARE_ZERO = 0, - SQUARE_NB = 64 -}; -// clang-format on - -enum Direction : int8_t { - NORTH = 8, - EAST = 1, - SOUTH = -NORTH, - WEST = -EAST, - - NORTH_EAST = NORTH + EAST, - SOUTH_EAST = SOUTH + EAST, - SOUTH_WEST = SOUTH + WEST, - NORTH_WEST = NORTH + WEST -}; - -enum File : uint8_t { - FILE_A, - FILE_B, - FILE_C, - FILE_D, - FILE_E, - FILE_F, - FILE_G, - FILE_H, - FILE_NB -}; - -enum Rank : uint8_t { - RANK_1, - RANK_2, - RANK_3, - RANK_4, - RANK_5, - RANK_6, - RANK_7, - RANK_8, - RANK_NB -}; - -// Keep track of what a move changes on the board (used by NNUE) -struct DirtyPiece { - Piece pc; // this is never allowed to be NO_PIECE - Square from, to; // to should be SQ_NONE for promotions - - // if {add,remove}_sq is SQ_NONE, {add,remove}_pc is allowed to be - // uninitialized - // castling uses add_sq and remove_sq to remove and add the rook - Square remove_sq, add_sq; - Piece remove_pc, add_pc; -}; - -// Keep track of what threats change on the board (used by NNUE) -struct DirtyThreat { - static constexpr int PcSqOffset = 0; - static constexpr int ThreatenedSqOffset = 8; - static constexpr int ThreatenedPcOffset = 16; - static constexpr int PcOffset = 20; - - DirtyThreat() { /* don't initialize data */ } - DirtyThreat(uint32_t raw) : - data(raw) {} - DirtyThreat(Piece pc, Piece threatened_pc, Square pc_sq, Square threatened_sq, bool add) { - data = (uint32_t(add) << 31) | (pc << PcOffset) | (threatened_pc << ThreatenedPcOffset) - | (threatened_sq << ThreatenedSqOffset) | (pc_sq << PcSqOffset); - } - - Piece pc() const { return static_cast(data >> PcOffset & 0xf); } - Piece threatened_pc() const { return static_cast(data >> ThreatenedPcOffset & 0xf); } - Square threatened_sq() const { return static_cast(data >> ThreatenedSqOffset & 0xff); } - Square pc_sq() const { return static_cast(data >> PcSqOffset & 0xff); } - bool add() const { return data >> 31; } - uint32_t raw() const { return data; } - - private: - uint32_t data; -}; - -// A piece can be involved in at most 8 outgoing attacks and 16 incoming attacks. -// Moving a piece also can reveal at most 8 discovered attacks. -// This implies that a non-castling move can change at most (8 + 16) * 3 + 8 = 80 features. -// By similar logic, a castling move can change at most (5 + 1 + 3 + 9) * 2 = 36 features. -// Thus, 80 should work as an upper bound. Finally, 16 entries are added to accommodate -// unmasked vector stores near the end of the list. - -using DirtyThreatList = ValueList; - -struct DirtyThreats { - DirtyThreatList list; - Color us; - Square prevKsq, ksq; - - Bitboard threatenedSqs, threateningSqs; -}; - - #define ENABLE_INCR_OPERATORS_ON(T) \ - constexpr T& operator++(T& d) { return d = T(int(d) + 1); } \ - constexpr T& operator--(T& d) { return d = T(int(d) - 1); } - -ENABLE_INCR_OPERATORS_ON(PieceType) -ENABLE_INCR_OPERATORS_ON(Square) -ENABLE_INCR_OPERATORS_ON(File) -ENABLE_INCR_OPERATORS_ON(Rank) - - #undef ENABLE_INCR_OPERATORS_ON - -constexpr Direction operator+(Direction d1, Direction d2) { return Direction(int(d1) + int(d2)); } -constexpr Direction operator*(int i, Direction d) { return Direction(i * int(d)); } - -// Additional operators to add a Direction to a Square -constexpr Square operator+(Square s, Direction d) { return Square(int(s) + int(d)); } -constexpr Square operator-(Square s, Direction d) { return Square(int(s) - int(d)); } -constexpr Square& operator+=(Square& s, Direction d) { return s = s + d; } -constexpr Square& operator-=(Square& s, Direction d) { return s = s - d; } - -// Toggle color -constexpr Color operator~(Color c) { return Color(c ^ BLACK); } - -// Swap A1 <-> A8 -constexpr Square flip_rank(Square s) { return Square(s ^ SQ_A8); } - -// Swap A1 <-> H1 -constexpr Square flip_file(Square s) { return Square(s ^ SQ_H1); } - -// Swap color of piece B_KNIGHT <-> W_KNIGHT -constexpr Piece operator~(Piece pc) { return Piece(pc ^ 8); } - -constexpr CastlingRights operator&(Color c, CastlingRights cr) { - return CastlingRights((c == WHITE ? WHITE_CASTLING : BLACK_CASTLING) & cr); -} - -constexpr Value mate_in(int ply) { return VALUE_MATE - ply; } - -constexpr Value mated_in(int ply) { return -VALUE_MATE + ply; } - -constexpr Square make_square(File f, Rank r) { return Square((r << 3) + f); } - -constexpr Piece make_piece(Color c, PieceType pt) { return Piece((c << 3) + pt); } - -constexpr PieceType type_of(Piece pc) { return PieceType(pc & 7); } - -constexpr Color color_of(Piece pc) { - assert(pc != NO_PIECE); - return Color(pc >> 3); -} - -constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } - -constexpr File file_of(Square s) { return File(s & 7); } - -constexpr Rank rank_of(Square s) { return Rank(s >> 3); } - -constexpr Square relative_square(Color c, Square s) { return Square(s ^ (c * 56)); } - -constexpr Rank relative_rank(Color c, Rank r) { return Rank(r ^ (c * 7)); } - -constexpr Rank relative_rank(Color c, Square s) { return relative_rank(c, rank_of(s)); } - -constexpr Direction pawn_push(Color c) { return c == WHITE ? NORTH : SOUTH; } - - -// Based on a congruential pseudo-random number generator -constexpr Key make_key(uint64_t seed) { - return seed * 6364136223846793005ULL + 1442695040888963407ULL; -} - - -enum MoveType : uint16_t { - NORMAL, - PROMOTION = 1 << 14, - EN_PASSANT = 2 << 14, - CASTLING = 3 << 14 -}; - -// A move needs 16 bits to be stored -// -// bit 0- 5: destination square (from 0 to 63) -// bit 6-11: origin square (from 0 to 63) -// bit 12-13: promotion piece type - 2 (from KNIGHT-2 to QUEEN-2) -// bit 14-15: special move flag: promotion (1), en passant (2), castling (3) -// NOTE: en passant bit is set only when a pawn can be captured -// -// Special cases are Move::none() and Move::null(). We can sneak these in because -// in any normal move the destination square and origin square are always different, -// but Move::none() and Move::null() have the same origin and destination square. - -class Move { - public: - Move() = default; - constexpr explicit Move(std::uint16_t d) : - data(d) {} - - constexpr Move(Square from, Square to) : - data((from << 6) + to) {} - - template - static constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { - return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); - } - - constexpr Square from_sq() const { - assert(is_ok()); - return Square((data >> 6) & 0x3F); - } - - constexpr Square to_sq() const { - assert(is_ok()); - return Square(data & 0x3F); - } - - // Same as to_sq() but without assertion, for branchless code paths - // where the result is masked/ignored when move is not ok - constexpr Square to_sq_unchecked() const { return Square(data & 0x3F); } - - constexpr MoveType type_of() const { return MoveType(data & (3 << 14)); } - - constexpr PieceType promotion_type() const { return PieceType(((data >> 12) & 3) + KNIGHT); } - - constexpr bool is_ok() const { return none().data != data && null().data != data; } - - static constexpr Move null() { return Move(65); } - static constexpr Move none() { return Move(0); } - - constexpr bool operator==(const Move& m) const { return data == m.data; } - constexpr bool operator!=(const Move& m) const { return data != m.data; } - - constexpr explicit operator bool() const { return data != 0; } - - constexpr std::uint16_t raw() const { return data; } - - struct MoveHash { - std::size_t operator()(const Move& m) const { return make_key(m.data); } - }; - - protected: - std::uint16_t data; -}; - -template -struct is_all_same { - static constexpr bool value = (std::is_same_v && ...); -}; - -template -constexpr auto is_all_same_v = is_all_same::value; - -} // namespace Stockfish - -#endif // #ifndef TYPES_H_INCLUDED - -#include "tune.h" // Global visibility to tuning setup diff --git a/src/uci.cpp b/src/uci.cpp deleted file mode 100644 index 385dfeb4ee04dbbe7453f4c7d3bdd7ecba16238e..0000000000000000000000000000000000000000 --- a/src/uci.cpp +++ /dev/null @@ -1,658 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "uci.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "benchmark.h" -#include "engine.h" -#include "memory.h" -#include "movegen.h" -#include "position.h" -#include "score.h" -#include "search.h" -#include "types.h" -#include "ucioption.h" - -namespace Stockfish { - -constexpr auto BenchmarkCommand = "speedtest"; - -constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; -template -struct overload: Ts... { - using Ts::operator()...; -}; - -template -overload(Ts...) -> overload; - -void UCIEngine::print_info_string(std::string_view str) { - sync_cout_start(); - for (auto& line : split(str, "\n")) - { - if (!is_whitespace(line)) - { - std::cout << "info string " << line << '\n'; - } - } - sync_cout_end(); -} - -UCIEngine::UCIEngine(int argc, char** argv) : - engine(argv[0]), - cli(argc, argv) { - - engine.get_options().add_info_listener([](const std::optional& str) { - if (str.has_value()) - print_info_string(*str); - }); - - init_search_update_listeners(); -} - -void UCIEngine::init_search_update_listeners() { - engine.set_on_iter([](const auto& i) { on_iter(i); }); - engine.set_on_update_no_moves([](const auto& i) { on_update_no_moves(i); }); - engine.set_on_update_full( - [this](const auto& i) { on_update_full(i, engine.get_options()["UCI_ShowWDL"]); }); - engine.set_on_bestmove([](const auto& bm, const auto& p) { on_bestmove(bm, p); }); - engine.set_on_verify_networks([](const auto& s) { print_info_string(s); }); -} - -void UCIEngine::loop() { - std::string token, cmd; - - for (int i = 1; i < cli.argc; ++i) - cmd += std::string(cli.argv[i]) + " "; - - do - { - if (cli.argc == 1 - && !getline(std::cin, cmd)) // Wait for an input or an end-of-file (EOF) indication - cmd = "quit"; - - std::istringstream is(cmd); - - token.clear(); // Avoid a stale if getline() returns nothing or a blank line - is >> std::skipws >> token; - - if (token == "quit" || token == "stop") - engine.stop(); - - // The GUI sends 'ponderhit' to tell that the user has played the expected move. - // So, 'ponderhit' is sent if pondering was done on the same move that the user - // has played. The search should continue, but should also switch from pondering - // to the normal search. - else if (token == "ponderhit") - engine.set_ponderhit(false); - - else if (token == "uci") - { - sync_cout << "id name " << engine_info(true) << "\n" - << engine.get_options() << sync_endl; - - sync_cout << "uciok" << sync_endl; - } - - else if (token == "setoption") - setoption(is); - else if (token == "go") - { - // send info strings after the go command is sent for old GUIs and python-chess - print_info_string(engine.numa_config_information_as_string()); - print_info_string(engine.thread_allocation_information_as_string()); - go(is); - } - else if (token == "position") - position(is); - else if (token == "ucinewgame") - engine.search_clear(); - else if (token == "isready") - sync_cout << "readyok" << sync_endl; - - // Add custom non-UCI commands, mainly for debugging purposes. - // These commands must not be used during a search! - else if (token == "flip") - engine.flip(); - else if (token == "bench") - bench(is); - else if (token == BenchmarkCommand) - benchmark(is); - else if (token == "d") - sync_cout << engine.visualize() << sync_endl; - else if (token == "eval") - engine.trace_eval(); - else if (token == "compiler") - sync_cout << compiler_info() << sync_endl; - else if (token == "export_net") - { - std::pair, std::string> files[2]; - - if (is >> std::skipws >> files[0].second) - files[0].first = files[0].second; - - if (is >> std::skipws >> files[1].second) - files[1].first = files[1].second; - - engine.save_network(files); - } - else if (token == "--help" || token == "help" || token == "--license" || token == "license") - sync_cout - << "\nStockfish is a powerful chess engine for playing and analyzing." - "\nIt is released as free software licensed under the GNU GPLv3 License." - "\nStockfish is normally used with a graphical user interface (GUI) and implements" - "\nthe Universal Chess Interface (UCI) protocol to communicate with a GUI, an API, etc." - "\nFor any further information, visit https://github.com/official-stockfish/Stockfish#readme" - "\nor read the corresponding README.md and Copying.txt files distributed along with this program.\n" - << sync_endl; - else if (!token.empty() && token[0] != '#') - sync_cout << "Unknown command: '" << cmd << "'. Type help for more information." - << sync_endl; - - } while (token != "quit" && cli.argc == 1); // The command-line arguments are one-shot -} - -Search::LimitsType UCIEngine::parse_limits(std::istream& is) { - Search::LimitsType limits; - std::string token; - - limits.startTime = now(); // The search starts as early as possible - - while (is >> token) - if (token == "searchmoves") // Needs to be the last command on the line - while (is >> token) - limits.searchmoves.push_back(to_lower(token)); - - else if (token == "wtime") - is >> limits.time[WHITE]; - else if (token == "btime") - is >> limits.time[BLACK]; - else if (token == "winc") - is >> limits.inc[WHITE]; - else if (token == "binc") - is >> limits.inc[BLACK]; - else if (token == "movestogo") - is >> limits.movestogo; - else if (token == "depth") - is >> limits.depth; - else if (token == "nodes") - is >> limits.nodes; - else if (token == "movetime") - is >> limits.movetime; - else if (token == "mate") - is >> limits.mate; - else if (token == "perft") - is >> limits.perft; - else if (token == "infinite") - limits.infinite = 1; - else if (token == "ponder") - limits.ponderMode = true; - - return limits; -} - -void UCIEngine::go(std::istringstream& is) { - - Search::LimitsType limits = parse_limits(is); - - if (limits.perft) - perft(limits); - else - engine.go(limits); -} - -void UCIEngine::bench(std::istream& args) { - std::string token; - uint64_t num, nodes = 0, cnt = 1; - uint64_t nodesSearched = 0; - const auto& options = engine.get_options(); - - engine.set_on_update_full([&](const auto& i) { - nodesSearched = i.nodes; - on_update_full(i, options["UCI_ShowWDL"]); - }); - - std::vector list = Benchmark::setup_bench(engine.fen(), args); - - num = count_if(list.begin(), list.end(), - [](const std::string& s) { return s.find("go ") == 0 || s.find("eval") == 0; }); - - TimePoint elapsed = now(); - - for (const auto& cmd : list) - { - std::istringstream is(cmd); - is >> std::skipws >> token; - - if (token == "go" || token == "eval") - { - std::cerr << "\nPosition: " << cnt++ << '/' << num << " (" << engine.fen() << ")" - << std::endl; - if (token == "go") - { - Search::LimitsType limits = parse_limits(is); - - if (limits.perft) - nodesSearched = perft(limits); - else - { - engine.go(limits); - engine.wait_for_search_finished(); - } - - nodes += nodesSearched; - nodesSearched = 0; - } - else - engine.trace_eval(); - } - else if (token == "setoption") - setoption(is); - else if (token == "position") - position(is); - else if (token == "ucinewgame") - { - engine.search_clear(); // search_clear may take a while - elapsed = now(); - } - } - - elapsed = now() - elapsed + 1; // Ensure positivity to avoid a 'divide by zero' - - dbg_print(); - - std::cerr << "\n===========================" // - << "\nTotal time (ms) : " << elapsed // - << "\nNodes searched : " << nodes // - << "\nNodes/second : " << 1000 * nodes / elapsed << std::endl; - - // reset callback, to not capture a dangling reference to nodesSearched - engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); -} - -void UCIEngine::benchmark(std::istream& args) { - // Probably not very important for a test this long, but include for completeness and sanity. - static constexpr int NUM_WARMUP_POSITIONS = 3; - - std::string token; - uint64_t nodes = 0, cnt = 1; - uint64_t nodesSearched = 0; - - engine.set_on_update_full([&](const Engine::InfoFull& i) { nodesSearched = i.nodes; }); - - engine.set_on_iter([](const auto&) {}); - engine.set_on_update_no_moves([](const auto&) {}); - engine.set_on_bestmove([](const auto&, const auto&) {}); - engine.set_on_verify_networks([](const auto&) {}); - - Benchmark::BenchmarkSetup setup = Benchmark::setup_benchmark(args); - - const auto numGoCommands = count_if(setup.commands.begin(), setup.commands.end(), - [](const std::string& s) { return s.find("go ") == 0; }); - - TimePoint totalTime = 0; - - // Set options once at the start. - auto ss = std::istringstream("name Threads value " + std::to_string(setup.threads)); - setoption(ss); - ss = std::istringstream("name Hash value " + std::to_string(setup.ttSize)); - setoption(ss); - ss = std::istringstream("name UCI_Chess960 value false"); - setoption(ss); - - // Warmup - for (const auto& cmd : setup.commands) - { - std::istringstream is(cmd); - is >> std::skipws >> token; - - if (token == "go") - { - // One new line is produced by the search, so omit it here - std::cerr << "\rWarmup position " << cnt++ << '/' << NUM_WARMUP_POSITIONS; - - Search::LimitsType limits = parse_limits(is); - - // Run with silenced network verification - engine.go(limits); - engine.wait_for_search_finished(); - } - else if (token == "position") - position(is); - else if (token == "ucinewgame") - { - engine.search_clear(); // search_clear may take a while - } - - if (cnt > NUM_WARMUP_POSITIONS) - break; - } - - std::cerr << "\n"; - - cnt = 1; - nodes = 0; - - int numHashfullReadings = 0; - constexpr int hashfullAges[] = {0, 999}; // Only normal hashfull and touched hash. - constexpr int hashfullAgeCount = std::size(hashfullAges); - int totalHashfull[hashfullAgeCount] = {0}; - int maxHashfull[hashfullAgeCount] = {0}; - - auto updateHashfullReadings = [&]() { - numHashfullReadings += 1; - - for (int i = 0; i < hashfullAgeCount; ++i) - { - const int hashfull = engine.get_hashfull(hashfullAges[i]); - maxHashfull[i] = std::max(maxHashfull[i], hashfull); - totalHashfull[i] += hashfull; - } - }; - - engine.search_clear(); // search_clear may take a while - - for (const auto& cmd : setup.commands) - { - std::istringstream is(cmd); - is >> std::skipws >> token; - - if (token == "go") - { - // One new line is produced by the search, so omit it here - std::cerr << "\rPosition " << cnt++ << '/' << numGoCommands; - - Search::LimitsType limits = parse_limits(is); - - nodesSearched = 0; - TimePoint elapsed = now(); - - // Run with silenced network verification - engine.go(limits); - engine.wait_for_search_finished(); - - totalTime += now() - elapsed; - - updateHashfullReadings(); - - nodes += nodesSearched; - } - else if (token == "position") - position(is); - else if (token == "ucinewgame") - { - engine.search_clear(); // search_clear may take a while - } - } - - totalTime = std::max(totalTime, 1); // Ensure positivity to avoid a 'divide by zero' - - dbg_print(); - - std::cerr << "\n"; - - static_assert( - std::size(hashfullAges) == 2 && hashfullAges[0] == 0 && hashfullAges[1] == 999, - "Hardcoded for display. Would complicate the code needlessly in the current state."); - - std::string threadBinding = engine.thread_binding_information_as_string(); - if (threadBinding.empty()) - threadBinding = "none"; - - // clang-format off - - std::cerr << "===========================" - << "\nVersion : " - << engine_version_info() - // "\nCompiled by : " - << compiler_info() - << "Large pages : " << (has_large_pages() ? "yes" : "no") - << "\nUser invocation : " << BenchmarkCommand << " " - << setup.originalInvocation << "\nFilled invocation : " << BenchmarkCommand - << " " << setup.filledInvocation - << "\nAvailable processors : " << engine.get_numa_config_as_string() - << "\nThread count : " << setup.threads - << "\nThread binding : " << threadBinding - << "\nTT size [MiB] : " << setup.ttSize - << "\nHash max, avg [per mille] : " - << "\n single search : " << maxHashfull[0] << ", " - << totalHashfull[0] / numHashfullReadings - << "\n single game : " << maxHashfull[1] << ", " - << totalHashfull[1] / numHashfullReadings - << "\nTotal nodes searched : " << nodes - << "\nTotal search time [s] : " << totalTime / 1000.0 - << "\nNodes/second : " << 1000 * nodes / totalTime << std::endl; - - // clang-format on - - init_search_update_listeners(); -} - -void UCIEngine::setoption(std::istringstream& is) { - engine.wait_for_search_finished(); - engine.get_options().setoption(is); -} - -std::uint64_t UCIEngine::perft(const Search::LimitsType& limits) { - auto nodes = engine.perft(engine.fen(), limits.perft, engine.get_options()["UCI_Chess960"]); - sync_cout << "\nNodes searched: " << nodes << "\n" << sync_endl; - return nodes; -} - -void UCIEngine::position(std::istringstream& is) { - std::string token, fen; - - is >> token; - - if (token == "startpos") - { - fen = StartFEN; - is >> token; // Consume the "moves" token, if any - } - else if (token == "fen") - while (is >> token && token != "moves") - fen += token + " "; - else - return; - - std::vector moves; - - while (is >> token) - { - moves.push_back(token); - } - - engine.set_position(fen, moves); -} - -namespace { - -struct WinRateParams { - double a; - double b; -}; - -WinRateParams win_rate_params(const Position& pos) { - - int material = pos.count() + 3 * pos.count() + 3 * pos.count() - + 5 * pos.count() + 9 * pos.count(); - - // The fitted model only uses data for material counts in [17, 78], and is anchored at count 58. - double m = std::clamp(material, 17, 78) / 58.0; - - // Return a = p_a(material) and b = p_b(material), see github.com/official-stockfish/WDL_model - constexpr double as[] = {-72.32565836, 185.93832038, -144.58862193, 416.44950446}; - constexpr double bs[] = {83.86794042, -136.06112997, 69.98820887, 47.62901433}; - - double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; - double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; - - return {a, b}; -} - -// The win rate model is 1 / (1 + exp((a - eval) / b)), where a = p_a(material) and b = p_b(material). -// It fits the LTC fishtest statistics rather accurately. -int win_rate_model(Value v, const Position& pos) { - - auto [a, b] = win_rate_params(pos); - - // Return the win rate in per mille units, rounded to the nearest integer. - return int(0.5 + 1000 / (1 + std::exp((a - double(v)) / b))); -} -} - -std::string UCIEngine::format_score(const Score& s) { - constexpr int TB_CP = 20000; - if (s.is()) { - return std::string("cp ") + std::to_string(s.get().value); - } else if (s.is()) { - auto mate = s.get(); - auto m = (mate.plies > 0 ? (mate.plies + 1) : mate.plies) / 2; - return std::string("mate ") + std::to_string(m); - } else if (s.is()) { - auto tb = s.get(); - return std::string("cp ") + std::to_string((tb.win ? TB_CP - tb.plies : -TB_CP - tb.plies)); - } - return "cp 0"; -} - -// Turns a Value to an integer centipawn number, -// without treatment of mate and similar special scores. -int UCIEngine::to_cp(Value v, const Position& pos) { - - // In general, the score can be defined via the WDL as - // (log(1/L - 1) - log(1/W - 1)) / (log(1/L - 1) + log(1/W - 1)). - // Based on our win_rate_model, this simply yields v / a. - - auto [a, b] = win_rate_params(pos); - - return int(std::round(100 * int(v) / a)); -} - -std::string UCIEngine::wdl(Value v, const Position& pos) { - std::stringstream ss; - - int wdl_w = win_rate_model(v, pos); - int wdl_l = win_rate_model(-v, pos); - int wdl_d = 1000 - wdl_w - wdl_l; - ss << wdl_w << " " << wdl_d << " " << wdl_l; - - return ss.str(); -} - -std::string UCIEngine::square(Square s) { - return std::string{char('a' + file_of(s)), char('1' + rank_of(s))}; -} - -std::string UCIEngine::move(Move m, bool chess960) { - if (m == Move::none()) - return "(none)"; - - if (m == Move::null()) - return "0000"; - - Square from = m.from_sq(); - Square to = m.to_sq(); - - if (m.type_of() == CASTLING && !chess960) - to = make_square(to > from ? FILE_G : FILE_C, rank_of(from)); - - std::string move = square(from) + square(to); - - if (m.type_of() == PROMOTION) - move += " pnbrqk"[m.promotion_type()]; - - return move; -} - - -std::string UCIEngine::to_lower(std::string str) { - std::transform(str.begin(), str.end(), str.begin(), [](auto c) { return std::tolower(c); }); - - return str; -} - -Move UCIEngine::to_move(const Position& pos, std::string str) { - str = to_lower(str); - - for (const auto& m : MoveList(pos)) - if (str == move(m, pos.is_chess960())) - return m; - - return Move::none(); -} - -void UCIEngine::on_update_no_moves(const Engine::InfoShort& info) { - sync_cout << "info depth " << info.depth << " score " << format_score(info.score) << sync_endl; -} - -void UCIEngine::on_update_full(const Engine::InfoFull& info, bool showWDL) { - std::stringstream ss; - - ss << "info"; - ss << " depth " << info.depth // - << " seldepth " << info.selDepth // - << " multipv " << info.multiPV // - << " score " << format_score(info.score); // - - if (!info.bound.empty()) - ss << " " << info.bound; - - if (showWDL) - ss << " wdl " << info.wdl; - - ss << " nodes " << info.nodes // - << " nps " << info.nps // - << " hashfull " << info.hashfull // - << " tbhits " << info.tbHits // - << " time " << info.timeMs // - << " pv " << info.pv; // - - sync_cout << ss.str() << sync_endl; -} - -void UCIEngine::on_iter(const Engine::InfoIter& info) { - std::stringstream ss; - - ss << "info"; - ss << " depth " << info.depth // - << " currmove " << info.currmove // - << " currmovenumber " << info.currmovenumber; // - - sync_cout << ss.str() << sync_endl; -} - -void UCIEngine::on_bestmove(std::string_view bestmove, std::string_view ponder) { - sync_cout << "bestmove " << bestmove; - if (!ponder.empty()) - std::cout << " ponder " << ponder; - std::cout << sync_endl; -} - -} // namespace Stockfish diff --git a/src/uci.h b/src/uci.h deleted file mode 100644 index ebc04fc3c70bb2230c38facc5b55047a46ffae65..0000000000000000000000000000000000000000 --- a/src/uci.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef UCI_H_INCLUDED -#define UCI_H_INCLUDED - -#include -#include -#include -#include - -#include "engine.h" -#include "misc.h" -#include "search.h" - -namespace Stockfish { - -class Position; -class Move; -class Score; -enum Square : uint8_t; -using Value = int; - -class UCIEngine { - public: - UCIEngine(int argc, char** argv); - - void loop(); - - static int to_cp(Value v, const Position& pos); - static std::string format_score(const Score& s); - static std::string square(Square s); - static std::string move(Move m, bool chess960); - static std::string wdl(Value v, const Position& pos); - static std::string to_lower(std::string str); - static Move to_move(const Position& pos, std::string str); - - static Search::LimitsType parse_limits(std::istream& is); - - auto& engine_options() { return engine.get_options(); } - - private: - Engine engine; - CommandLine cli; - - static void print_info_string(std::string_view str); - - void go(std::istringstream& is); - void bench(std::istream& args); - void benchmark(std::istream& args); - void position(std::istringstream& is); - void setoption(std::istringstream& is); - std::uint64_t perft(const Search::LimitsType&); - - static void on_update_no_moves(const Engine::InfoShort& info); - static void on_update_full(const Engine::InfoFull& info, bool showWDL); - static void on_iter(const Engine::InfoIter& info); - static void on_bestmove(std::string_view bestmove, std::string_view ponder); - - void init_search_update_listeners(); -}; - -} // namespace Stockfish - -#endif // #ifndef UCI_H_INCLUDED diff --git a/src/ucioption.cpp b/src/ucioption.cpp deleted file mode 100644 index 8db7967497e5277efc43bd693732e33ef8c344a6..0000000000000000000000000000000000000000 --- a/src/ucioption.cpp +++ /dev/null @@ -1,213 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "ucioption.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "misc.h" - -namespace Stockfish { - -bool CaseInsensitiveLess::operator()(const std::string& s1, const std::string& s2) const { - - return std::lexicographical_compare( - s1.begin(), s1.end(), s2.begin(), s2.end(), - [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); }); -} - -void OptionsMap::add_info_listener(InfoListener&& message_func) { info = std::move(message_func); } - -void OptionsMap::setoption(std::istringstream& is) { - std::string token, name, value; - - is >> token; // Consume the "name" token - - // Read the option name (can contain spaces) - while (is >> token && token != "value") - name += (name.empty() ? "" : " ") + token; - - // Read the option value (can contain spaces) - while (is >> token) - value += (value.empty() ? "" : " ") + token; - - if (options_map.count(name)) - options_map[name] = value; - else - sync_cout << "No such option: " << name << sync_endl; -} - -const Option& OptionsMap::operator[](const std::string& name) const { - auto it = options_map.find(name); - assert(it != options_map.end()); - return it->second; -} - -// Inits options and assigns idx in the correct printing order -void OptionsMap::add(const std::string& name, const Option& option) { - if (!options_map.count(name)) - { - static size_t insert_order = 0; - - options_map[name] = option; - - options_map[name].parent = this; - options_map[name].idx = insert_order++; - } - else - { - std::cerr << "Option \"" << name << "\" was already added!" << std::endl; - std::exit(EXIT_FAILURE); - } -} - - -std::size_t OptionsMap::count(const std::string& name) const { return options_map.count(name); } - -Option::Option(const OptionsMap* map) : - parent(map) {} - -Option::Option(const char* v, OnChange f) : - type("string"), - min(0), - max(0), - on_change(std::move(f)) { - defaultValue = currentValue = v; -} - -Option::Option(bool v, OnChange f) : - type("check"), - min(0), - max(0), - on_change(std::move(f)) { - defaultValue = currentValue = (v ? "true" : "false"); -} - -Option::Option(OnChange f) : - type("button"), - min(0), - max(0), - on_change(std::move(f)) {} - -Option::Option(int v, int minv, int maxv, OnChange f) : - type("spin"), - min(minv), - max(maxv), - on_change(std::move(f)) { - defaultValue = currentValue = std::to_string(v); -} - -Option::Option(const char* v, const char* cur, OnChange f) : - type("combo"), - min(0), - max(0), - on_change(std::move(f)) { - defaultValue = v; - currentValue = cur; -} - -Option::operator int() const { - assert(type == "check" || type == "spin"); - return (type == "spin" ? std::stoi(currentValue) : currentValue == "true"); -} - -Option::operator std::string() const { - assert(type == "string"); - return currentValue; -} - -bool Option::operator==(const char* s) const { - assert(type == "combo"); - return !CaseInsensitiveLess()(currentValue, s) && !CaseInsensitiveLess()(s, currentValue); -} - -bool Option::operator!=(const char* s) const { return !(*this == s); } - - -// Updates currentValue and triggers on_change() action. It's up to -// the GUI to check for option's limits, but we could receive the new value -// from the user by console window, so let's check the bounds anyway. -Option& Option::operator=(const std::string& v) { - - assert(!type.empty()); - - if ((type != "button" && type != "string" && v.empty()) - || (type == "check" && v != "true" && v != "false") - || (type == "spin" && (std::stoi(v) < min || std::stoi(v) > max))) - return *this; - - if (type == "combo") - { - OptionsMap comboMap; // To have case insensitive compare - std::string token; - std::istringstream ss(defaultValue); - while (ss >> token) - comboMap.add(token, Option()); - if (!comboMap.count(v) || v == "var") - return *this; - } - - if (type == "string") - currentValue = v == "" ? "" : v; - else if (type != "button") - currentValue = v; - - if (on_change) - { - const auto ret = on_change(*this); - - if (ret && parent != nullptr && parent->info != nullptr) - parent->info(ret); - } - - return *this; -} - -std::ostream& operator<<(std::ostream& os, const OptionsMap& om) { - for (size_t idx = 0; idx < om.options_map.size(); ++idx) - for (const auto& it : om.options_map) - if (it.second.idx == idx) - { - const Option& o = it.second; - os << "\noption name " << it.first << " type " << o.type; - - if (o.type == "check" || o.type == "combo") - os << " default " << o.defaultValue; - - else if (o.type == "string") - { - std::string defaultValue = o.defaultValue.empty() ? "" : o.defaultValue; - os << " default " << defaultValue; - } - - else if (o.type == "spin") - os << " default " << stoi(o.defaultValue) << " min " << o.min << " max " - << o.max; - - break; - } - - return os; -} -} diff --git a/src/ucioption.h b/src/ucioption.h deleted file mode 100644 index 4f6d7541cff0a1e3603f62291c61ad96876466b9..0000000000000000000000000000000000000000 --- a/src/ucioption.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef UCIOPTION_H_INCLUDED -#define UCIOPTION_H_INCLUDED - -#include -#include -#include -#include -#include -#include - -namespace Stockfish { -// Define a custom comparator, because the UCI options should be case-insensitive -struct CaseInsensitiveLess { - bool operator()(const std::string&, const std::string&) const; -}; - -class OptionsMap; - -// The Option class implements each option as specified by the UCI protocol -class Option { - public: - using OnChange = std::function(const Option&)>; - - Option(const OptionsMap*); - Option(OnChange = nullptr); - Option(bool v, OnChange = nullptr); - Option(const char* v, OnChange = nullptr); - Option(int v, int minv, int maxv, OnChange = nullptr); - Option(const char* v, const char* cur, OnChange = nullptr); - - Option& operator=(const std::string&); - operator int() const; - operator std::string() const; - bool operator==(const char*) const; - bool operator!=(const char*) const; - - friend std::ostream& operator<<(std::ostream&, const OptionsMap&); - - int operator<<(const Option&) = delete; - - private: - friend class OptionsMap; - friend class Engine; - friend class Tune; - - - std::string defaultValue, currentValue, type; - int min, max; - size_t idx; - OnChange on_change; - const OptionsMap* parent = nullptr; -}; - -class OptionsMap { - public: - using InfoListener = std::function)>; - - OptionsMap() = default; - OptionsMap(const OptionsMap&) = delete; - OptionsMap(OptionsMap&&) = delete; - OptionsMap& operator=(const OptionsMap&) = delete; - OptionsMap& operator=(OptionsMap&&) = delete; - - void add_info_listener(InfoListener&&); - - void setoption(std::istringstream&); - - const Option& operator[](const std::string&) const; - - void add(const std::string&, const Option& option); - - std::size_t count(const std::string&) const; - - private: - friend class Engine; - friend class Option; - - friend std::ostream& operator<<(std::ostream&, const OptionsMap&); - - // The options container is defined as a std::map - using OptionsStore = std::map; - - OptionsStore options_map; - InfoListener info; -}; - -} -#endif // #ifndef UCIOPTION_H_INCLUDED