thrust / cmake /ThrustCudaConfig.cmake
camenduru's picture
thanks to nvidia ❤
8ae5fc5
enable_language(CUDA)
set(THRUST_KNOWN_COMPUTE_ARCHS 50 52 53 60 61 62 70 72 75 80 86)
if (NVIDIA STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER 11.7)
list(APPEND THRUST_KNOWN_COMPUTE_ARCHS 90)
endif()
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.0)
list(APPEND THRUST_KNOWN_COMPUTE_ARCHS 35 37)
endif()
else()
list(APPEND THRUST_KNOWN_COMPUTE_ARCHS 35 37 90)
endif()
# Split CUDA_FLAGS into 3 parts:
#
# THRUST_CUDA_FLAGS_BASE: Common CUDA flags for all targets.
# THRUST_CUDA_FLAGS_RDC: Additional CUDA flags for targets compiled with RDC.
# THRUST_CUDA_FLAGS_NO_RDC: Additional CUDA flags for targets compiled without RDC.
#
# This is necessary because CUDA SMs 5.3, 6.2, and 7.2 do not support RDC, but
# we want to always build some targets (e.g. testing/cuda/*) with RDC.
# We work around this by building the "always RDC" targets without support for
# those SMs. This requires two sets of CUDA_FLAGS.
#
# Enabling any of those SMs along with the ENABLE_RDC options will result in a
# configuration error.
#
# Because of how CMake handles the CMAKE_CUDA_FLAGS variables, every target
# generated in a given directory will use the same value for CMAKE_CUDA_FLAGS,
# which is determined at the end of the directory's scope. This means caution
# should be used when trying to build different targets with different flags,
# since they might not behave as expected. This will improve with CMake 3.18,
# which add the DEVICE_LINK genex, fixing the issue with using per-target
# CUDA_FLAGS: https://gitlab.kitware.com/cmake/cmake/-/issues/18265
set(THRUST_CUDA_FLAGS_BASE "${CMAKE_CUDA_FLAGS}")
set(THRUST_CUDA_FLAGS_RDC)
set(THRUST_CUDA_FLAGS_NO_RDC)
# Archs that don't support RDC:
set(no_rdc_archs 53 62 72)
# Find the highest arch:
list(SORT THRUST_KNOWN_COMPUTE_ARCHS)
list(LENGTH THRUST_KNOWN_COMPUTE_ARCHS max_idx)
math(EXPR max_idx "${max_idx} - 1")
list(GET THRUST_KNOWN_COMPUTE_ARCHS ${max_idx} highest_arch)
option(THRUST_AUTO_DETECT_COMPUTE_ARCHS
"If ON, compute architectures for all GPUs in the current system are enabled and all other compute architectures are disabled."
OFF
)
if (THRUST_AUTO_DETECT_COMPUTE_ARCHS)
if ("NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
message(STATUS "Thrust: Using NVC++ builtin automatic compute architecture detection.")
else()
set(detect_compute_archs_source ${Thrust_SOURCE_DIR}/cmake/detect_compute_archs.cu)
set(detect_compute_archs_exe ${PROJECT_BINARY_DIR}/detect_compute_archs)
set(detect_compute_archs_error_log ${PROJECT_BINARY_DIR}/detect_compute_archs.stderr.log)
execute_process(
COMMAND ${CMAKE_CUDA_COMPILER}
-std=c++11
-o ${detect_compute_archs_exe}
--run
${detect_compute_archs_source}
OUTPUT_VARIABLE detected_archs
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_FILE ${detect_compute_archs_error_log})
if ("NONE" STREQUAL "${detected_archs}")
set(detected_message " none")
else()
foreach (arch IN LISTS detected_archs)
string(APPEND detected_message " sm_${arch}")
endforeach()
endif()
message(STATUS "Thrust: Automatically detected compute architectures:${detected_message}")
endif()
endif()
set(option_init OFF)
if ("NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
set(option_init ON)
endif()
option(THRUST_DISABLE_ARCH_BY_DEFAULT
"If ON, then all compute architectures are disabled on the initial CMake run."
${option_init}
)
set(option_init ON)
if (THRUST_DISABLE_ARCH_BY_DEFAULT OR THRUST_AUTO_DETECT_COMPUTE_ARCHS)
set(option_init OFF)
endif()
set(num_archs_enabled 0)
foreach (arch IN LISTS THRUST_KNOWN_COMPUTE_ARCHS)
set(this_option_init ${option_init})
if (${arch} IN_LIST detected_archs)
set(this_option_init ON)
endif()
option(THRUST_ENABLE_COMPUTE_${arch}
"Enable code generation for tests for sm_${arch}"
${this_option_init}
)
if (NOT THRUST_ENABLE_COMPUTE_${arch})
continue()
endif()
math(EXPR num_archs_enabled "${num_archs_enabled} + 1")
if ("NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
if (NOT ${num_archs_enabled} EQUAL 1)
message(FATAL_ERROR
"NVCXX does not support compilation for multiple device architectures "
"at once."
)
endif()
set(arch_flag "-gpu=cc${arch}")
elseif ("Clang" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
set(arch_flag "--cuda-gpu-arch=sm_${arch}")
else()
set(arch_flag "-gencode arch=compute_${arch},code=sm_${arch}")
endif()
string(APPEND compute_message " sm_${arch}")
string(APPEND THRUST_CUDA_FLAGS_NO_RDC " ${arch_flag}")
if (NOT arch IN_LIST no_rdc_archs)
string(APPEND THRUST_CUDA_FLAGS_RDC " ${arch_flag}")
endif()
endforeach()
if (NOT "NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
option(THRUST_ENABLE_COMPUTE_FUTURE
"Enable code generation for tests for compute_${highest_arch}"
${option_init}
)
if (THRUST_ENABLE_COMPUTE_FUTURE)
string(APPEND THRUST_CUDA_FLAGS_BASE
" -gencode arch=compute_${highest_arch},code=compute_${highest_arch}"
)
string(APPEND compute_message " compute_${highest_arch}")
endif()
endif()
message(STATUS "Thrust: Explicitly enabled compute architectures:${compute_message}")
# RDC is off by default in NVCC and on by default in NVC++. Turning off RDC
# isn't currently supported by NVC++. So, we default to RDC off for NVCC and
# RDC on for NVC++.
set(option_init OFF)
if ("NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
set(option_init ON)
endif()
option(THRUST_ENABLE_TESTS_WITH_RDC
"Build all Thrust tests with RDC; tests that require RDC are not affected by this option."
${option_init}
)
option(THRUST_ENABLE_EXAMPLES_WITH_RDC
"Build all Thrust examples with RDC; examples which require RDC are not affected by this option."
${option_init}
)
# Check for RDC/SM compatibility and error/warn if necessary
foreach (sm IN LISTS no_rdc_archs)
set(sm_opt THRUST_ENABLE_COMPUTE_${sm})
if (${sm_opt})
foreach (opt IN ITEMS TESTS EXAMPLES)
set(rdc_opt THRUST_ENABLE_${opt}_WITH_RDC)
if (${rdc_opt})
message(FATAL_ERROR
"${rdc_opt} is incompatible with ${sm_opt}, since sm_${sm} does not "
"support RDC."
)
endif()
endforeach()
message(NOTICE
"sm_${sm} does not support RDC. Targets that require RDC will be built "
"without support for this architecture."
)
endif()
endforeach()
#
# Clang CUDA options
#
if ("Clang" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
set(THRUST_CUDA_FLAGS_BASE "${THRUST_CUDA_FLAGS_BASE} -Wno-unknown-cuda-version -Xclang=-fcuda-allow-variadic-functions")
endif()
# By default RDC is not used:
set(CMAKE_CUDA_FLAGS "${THRUST_CUDA_FLAGS_BASE} ${THRUST_CUDA_FLAGS_NO_RDC}")