| /****************************************************************************** | |
| * Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions are met: | |
| * * Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * * Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in the | |
| * documentation and/or other materials provided with the distribution. | |
| * * Neither the name of the NVIDIA CORPORATION nor the | |
| * names of its contributors may be used to endorse or promote products | |
| * derived from this software without specific prior written permission. | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY | |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| ******************************************************************************/ | |
| #pragma once | |
| #include <cub/config.cuh> | |
| #include <cub/device/dispatch/dispatch_merge_sort.cuh> | |
| #include <cub/util_deprecated.cuh> | |
| #include <cub/util_namespace.cuh> | |
| CUB_NAMESPACE_BEGIN | |
| /** | |
| * @brief DeviceMergeSort provides device-wide, parallel operations for | |
| * computing a merge sort across a sequence of data items residing within | |
| * device-accessible memory. | |
| * | |
| * @ingroup SingleModule | |
| * | |
| * @par Overview | |
| * - DeviceMergeSort arranges items into ascending order using a comparison | |
| * functor with less-than semantics. Merge sort can handle arbitrary types (as | |
| * long as a value of these types is a model of [LessThan Comparable]) and | |
| * comparison functors, but is slower than DeviceRadixSort when sorting | |
| * arithmetic types into ascending/descending order. | |
| * - Another difference from RadixSort is the fact that DeviceMergeSort can | |
| * handle arbitrary random-access iterators, as shown below. | |
| * | |
| * @par A Simple Example | |
| * @par | |
| * The code snippet below illustrates a thrust reverse iterator usage. | |
| * @par | |
| * @code | |
| * #include <cub/cub.cuh> // or equivalently <cub/device/device_merge_sort.cuh> | |
| * | |
| * struct CustomLess | |
| * { | |
| * template <typename DataType> | |
| * __device__ bool operator()(const DataType &lhs, const DataType &rhs) | |
| * { | |
| * return lhs < rhs; | |
| * } | |
| * }; | |
| * | |
| * // Declare, allocate, and initialize device-accessible pointers | |
| * // for sorting data | |
| * thrust::device_vector<KeyType> d_keys(num_items); | |
| * thrust::device_vector<DataType> d_values(num_items); | |
| * // ... | |
| * | |
| * // Initialize iterator | |
| * using KeyIterator = typename thrust::device_vector<KeyType>::iterator; | |
| * thrust::reverse_iterator<KeyIterator> reverse_iter(d_keys.end()); | |
| * | |
| * // Determine temporary device storage requirements | |
| * std::size_t temp_storage_bytes = 0; | |
| * cub::DeviceMergeSort::SortPairs( | |
| * nullptr, | |
| * temp_storage_bytes, | |
| * reverse_iter, | |
| * thrust::raw_pointer_cast(d_values.data()), | |
| * num_items, | |
| * CustomLess()); | |
| * | |
| * // Allocate temporary storage | |
| * cudaMalloc(&d_temp_storage, temp_storage_bytes); | |
| * | |
| * // Run sorting operation | |
| * cub::DeviceMergeSort::SortPairs( | |
| * d_temp_storage, | |
| * temp_storage_bytes, | |
| * reverse_iter, | |
| * thrust::raw_pointer_cast(d_values.data()), | |
| * num_items, | |
| * CustomLess()); | |
| * @endcode | |
| * | |
| * [LessThan Comparable]: https://en.cppreference.com/w/cpp/named_req/LessThanComparable | |
| */ | |
| struct DeviceMergeSort | |
| { | |
| /** | |
| * @brief Sorts items using a merge sorting method. | |
| * | |
| * @par | |
| * SortPairs is not guaranteed to be stable. That is, suppose that i and j are | |
| * equivalent: neither one is less than the other. It is not guaranteed | |
| * that the relative order of these two elements will be preserved by sort. | |
| * | |
| * @par Snippet | |
| * The code snippet below illustrates the sorting of a device vector of `int` | |
| * keys with associated vector of `int` values. | |
| * @par | |
| * @code | |
| * #include <cub/cub.cuh> | |
| * // or equivalently <cub/device/device_merge_sort.cuh> | |
| * | |
| * // Declare, allocate, and initialize device-accessible pointers for | |
| * // sorting data | |
| * int num_items; // e.g., 7 | |
| * int *d_keys; // e.g., [8, 6, 6, 5, 3, 0, 9] | |
| * int *d_values; // e.g., [0, 1, 2, 3, 4, 5, 6] | |
| * ... | |
| * | |
| * // Initialize comparator | |
| * CustomOpT custom_op; | |
| * | |
| * // Determine temporary device storage requirements | |
| * void *d_temp_storage = nullptr; | |
| * std::size_t temp_storage_bytes = 0; | |
| * cub::DeviceMergeSort::SortPairs( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, d_values, num_items, custom_op); | |
| * | |
| * // Allocate temporary storage | |
| * cudaMalloc(&d_temp_storage, temp_storage_bytes); | |
| * | |
| * // Run sorting operation | |
| * cub::DeviceMergeSort::SortPairs( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, d_values, num_items, custom_op); | |
| * | |
| * // d_keys <-- [0, 3, 5, 6, 6, 8, 9] | |
| * // d_values <-- [5, 4, 3, 2, 1, 0, 6] | |
| * | |
| * @endcode | |
| * | |
| * @tparam KeyIteratorT | |
| * is a model of [Random Access Iterator]. `KeyIteratorT` is mutable, and | |
| * its `value_type` is a model of [LessThan Comparable]. This `value_type`'s | |
| * ordering relation is a *strict weak ordering* as defined in | |
| * the [LessThan Comparable] requirements. | |
| * | |
| * @tparam ValueIteratorT | |
| * is a model of [Random Access Iterator], and `ValueIteratorT` is mutable. | |
| * | |
| * @tparam OffsetT | |
| * is an integer type for global offsets. | |
| * | |
| * @tparam CompareOpT | |
| * is a type of callable object with the signature | |
| * `bool operator()(KeyT lhs, KeyT rhs)` that models | |
| * the [Strict Weak Ordering] concept. | |
| * | |
| * @param[in] d_temp_storage | |
| * Device-accessible allocation of temporary storage. When `nullptr`, the | |
| * required allocation size is written to `temp_storage_bytes` and no work | |
| * is done. | |
| * | |
| * @param[in,out] temp_storage_bytes | |
| * Reference to size in bytes of `d_temp_storage` allocation | |
| * | |
| * @param[in,out] d_keys | |
| * Pointer to the input sequence of unsorted input keys | |
| * | |
| * @param[in,out] d_items | |
| * Pointer to the input sequence of unsorted input values | |
| * | |
| * @param[in] num_items | |
| * Number of items to sort | |
| * | |
| * @param[in] compare_op | |
| * Comparison function object which returns true if the first argument is | |
| * ordered before the second | |
| * | |
| * @param[in] stream | |
| * **[optional]** CUDA stream to launch kernels within. Default is | |
| * stream<sub>0</sub>. | |
| * | |
| * [Random Access Iterator]: https://en.cppreference.com/w/cpp/iterator/random_access_iterator | |
| * [Strict Weak Ordering]: https://en.cppreference.com/w/cpp/concepts/strict_weak_order | |
| * [LessThan Comparable]: https://en.cppreference.com/w/cpp/named_req/LessThanComparable | |
| */ | |
| template <typename KeyIteratorT, | |
| typename ValueIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| SortPairs(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyIteratorT d_keys, | |
| ValueIteratorT d_items, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream = 0) | |
| { | |
| using DispatchMergeSortT = DispatchMergeSort<KeyIteratorT, | |
| ValueIteratorT, | |
| KeyIteratorT, | |
| ValueIteratorT, | |
| OffsetT, | |
| CompareOpT>; | |
| return DispatchMergeSortT::Dispatch(d_temp_storage, | |
| temp_storage_bytes, | |
| d_keys, | |
| d_items, | |
| d_keys, | |
| d_items, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| template <typename KeyIteratorT, | |
| typename ValueIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| SortPairs(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyIteratorT d_keys, | |
| ValueIteratorT d_items, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream, | |
| bool debug_synchronous) | |
| { | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG | |
| return SortPairs<KeyIteratorT, ValueIteratorT, OffsetT, CompareOpT>( | |
| d_temp_storage, | |
| temp_storage_bytes, | |
| d_keys, | |
| d_items, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| /** | |
| * @brief Sorts items using a merge sorting method. | |
| * | |
| * @par | |
| * - SortPairsCopy is not guaranteed to be stable. That is, suppose | |
| * that `i` and `j` are equivalent: neither one is less than the | |
| * other. It is not guaranteed that the relative order of these | |
| * two elements will be preserved by sort. | |
| * - Input arrays `d_input_keys` and `d_input_items` are not modified. | |
| * - Note that the behavior is undefined if the input and output ranges | |
| * overlap in any way. | |
| * | |
| * @par Snippet | |
| * The code snippet below illustrates the sorting of a device vector of | |
| * `int` keys with associated vector of `int` values. | |
| * @par | |
| * @code | |
| * #include <cub/cub.cuh> | |
| * // or equivalently <cub/device/device_merge_sort.cuh> | |
| * | |
| * // Declare, allocate, and initialize device-accessible pointers | |
| * // for sorting data | |
| * int num_items; // e.g., 7 | |
| * int *d_keys; // e.g., [8, 6, 6, 5, 3, 0, 9] | |
| * int *d_values; // e.g., [0, 1, 2, 3, 4, 5, 6] | |
| * ... | |
| * | |
| * // Initialize comparator | |
| * CustomOpT custom_op; | |
| * | |
| * // Determine temporary device storage requirements | |
| * void *d_temp_storage = nullptr; | |
| * std::size_t temp_storage_bytes = 0; | |
| * cub::DeviceMergeSort::SortPairsCopy( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, d_values, num_items, custom_op); | |
| * | |
| * // Allocate temporary storage | |
| * cudaMalloc(&d_temp_storage, temp_storage_bytes); | |
| * | |
| * // Run sorting operation | |
| * cub::DeviceMergeSort::SortPairsCopy( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, d_values, num_items, custom_op); | |
| * | |
| * // d_keys <-- [0, 3, 5, 6, 6, 8, 9] | |
| * // d_values <-- [5, 4, 3, 2, 1, 0, 6] | |
| * | |
| * @endcode | |
| * | |
| * @tparam KeyInputIteratorT | |
| * is a model of [Random Access Iterator]. Its `value_type` is a model of | |
| * [LessThan Comparable]. This `value_type`'s ordering relation is a | |
| * *strict weak ordering* as defined in the [LessThan Comparable] | |
| * requirements. | |
| * | |
| * @tparam ValueInputIteratorT | |
| * is a model of [Random Access Iterator]. | |
| * | |
| * @tparam KeyIteratorT | |
| * is a model of [Random Access Iterator]. `KeyIteratorT` is mutable, and | |
| * its `value_type` is a model of [LessThan Comparable]. This `value_type`'s | |
| * ordering relation is a *strict weak ordering* as defined in | |
| * the [LessThan Comparable] requirements. | |
| * | |
| * @tparam ValueIteratorT | |
| * is a model of [Random Access Iterator], and `ValueIteratorT` is mutable. | |
| * | |
| * @tparam OffsetT | |
| * is an integer type for global offsets. | |
| * | |
| * @tparam CompareOpT | |
| * is a type of callable object with the signature | |
| * `bool operator()(KeyT lhs, KeyT rhs)` that models | |
| * the [Strict Weak Ordering] concept. | |
| * | |
| * @param[in] d_temp_storage | |
| * Device-accessible allocation of temporary storage. When `nullptr`, the | |
| * required allocation size is written to `temp_storage_bytes` and no work | |
| * is done. | |
| * | |
| * @param[in,out] temp_storage_bytes | |
| * Reference to size in bytes of `d_temp_storage` allocation | |
| * | |
| * @param[in] d_input_keys | |
| * Pointer to the input sequence of unsorted input keys | |
| * | |
| * @param[in] d_input_items | |
| * Pointer to the input sequence of unsorted input values | |
| * | |
| * @param[out] d_output_keys | |
| * Pointer to the output sequence of sorted input keys | |
| * | |
| * @param[out] d_output_items | |
| * Pointer to the output sequence of sorted input values | |
| * | |
| * @param[in] num_items | |
| * Number of items to sort | |
| * | |
| * @param[in] compare_op | |
| * Comparison function object which returns `true` if the first argument is | |
| * ordered before the second | |
| * | |
| * @param[in] stream | |
| * **[optional]** CUDA stream to launch kernels within. Default is | |
| * stream<sub>0</sub>. | |
| * | |
| * [Random Access Iterator]: https://en.cppreference.com/w/cpp/iterator/random_access_iterator | |
| * [Strict Weak Ordering]: https://en.cppreference.com/w/cpp/concepts/strict_weak_order | |
| * [LessThan Comparable]: https://en.cppreference.com/w/cpp/named_req/LessThanComparable | |
| */ | |
| template <typename KeyInputIteratorT, | |
| typename ValueInputIteratorT, | |
| typename KeyIteratorT, | |
| typename ValueIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| SortPairsCopy(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyInputIteratorT d_input_keys, | |
| ValueInputIteratorT d_input_items, | |
| KeyIteratorT d_output_keys, | |
| ValueIteratorT d_output_items, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream = 0) | |
| { | |
| using DispatchMergeSortT = DispatchMergeSort<KeyInputIteratorT, | |
| ValueInputIteratorT, | |
| KeyIteratorT, | |
| ValueIteratorT, | |
| OffsetT, | |
| CompareOpT>; | |
| return DispatchMergeSortT::Dispatch(d_temp_storage, | |
| temp_storage_bytes, | |
| d_input_keys, | |
| d_input_items, | |
| d_output_keys, | |
| d_output_items, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| template <typename KeyInputIteratorT, | |
| typename ValueInputIteratorT, | |
| typename KeyIteratorT, | |
| typename ValueIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| SortPairsCopy(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyInputIteratorT d_input_keys, | |
| ValueInputIteratorT d_input_items, | |
| KeyIteratorT d_output_keys, | |
| ValueIteratorT d_output_items, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream, | |
| bool debug_synchronous) | |
| { | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG | |
| return SortPairsCopy<KeyInputIteratorT, | |
| ValueInputIteratorT, | |
| KeyIteratorT, | |
| ValueIteratorT, | |
| OffsetT, | |
| CompareOpT>(d_temp_storage, | |
| temp_storage_bytes, | |
| d_input_keys, | |
| d_input_items, | |
| d_output_keys, | |
| d_output_items, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| /** | |
| * @brief Sorts items using a merge sorting method. | |
| * | |
| * @par | |
| * SortKeys is not guaranteed to be stable. That is, suppose that `i` and `j` | |
| * are equivalent: neither one is less than the other. It is not guaranteed | |
| * that the relative order of these two elements will be preserved by sort. | |
| * | |
| * @par Snippet | |
| * The code snippet below illustrates the sorting of a device vector of `int` | |
| * keys. | |
| * @par | |
| * @code | |
| * #include <cub/cub.cuh> | |
| * // or equivalently <cub/device/device_merge_sort.cuh> | |
| * | |
| * // Declare, allocate, and initialize device-accessible pointers | |
| * // for sorting data | |
| * int num_items; // e.g., 7 | |
| * int *d_keys; // e.g., [8, 6, 7, 5, 3, 0, 9] | |
| * ... | |
| * | |
| * // Initialize comparator | |
| * CustomOpT custom_op; | |
| * | |
| * // Determine temporary device storage requirements | |
| * void *d_temp_storage = nullptr; | |
| * std::size_t temp_storage_bytes = 0; | |
| * cub::DeviceMergeSort::SortKeys( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, num_items, custom_op); | |
| * | |
| * // Allocate temporary storage | |
| * cudaMalloc(&d_temp_storage, temp_storage_bytes); | |
| * | |
| * // Run sorting operation | |
| * cub::DeviceMergeSort::SortKeys( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, num_items, custom_op); | |
| * | |
| * // d_keys <-- [0, 3, 5, 6, 7, 8, 9] | |
| * @endcode | |
| * | |
| * @tparam KeyIteratorT | |
| * is a model of [Random Access Iterator]. `KeyIteratorT` is mutable, and | |
| * its `value_type` is a model of [LessThan Comparable]. This `value_type`'s | |
| * ordering relation is a *strict weak ordering* as defined in | |
| * the [LessThan Comparable] requirements. | |
| * | |
| * @tparam OffsetT | |
| * is an integer type for global offsets. | |
| * | |
| * @tparam CompareOpT | |
| * is a type of callable object with the signature | |
| * `bool operator()(KeyT lhs, KeyT rhs)` that models | |
| * the [Strict Weak Ordering] concept. | |
| * | |
| * @param[in] d_temp_storage | |
| * Device-accessible allocation of temporary storage. When `nullptr`, the | |
| * required allocation size is written to `temp_storage_bytes` and no work | |
| * is done. | |
| * | |
| * @param[in,out] temp_storage_bytes | |
| * Reference to size in bytes of `d_temp_storage` allocation | |
| * | |
| * @param[in,out] d_keys | |
| * Pointer to the input sequence of unsorted input keys | |
| * | |
| * @param[in] num_items | |
| * Number of items to sort | |
| * | |
| * @param[in] compare_op | |
| * Comparison function object which returns true if the first argument is | |
| * ordered before the second | |
| * | |
| * @param[in] stream | |
| * **[optional]** CUDA stream to launch kernels within. Default is | |
| * stream<sub>0</sub>. | |
| * | |
| * [Random Access Iterator]: https://en.cppreference.com/w/cpp/iterator/random_access_iterator | |
| * [Strict Weak Ordering]: https://en.cppreference.com/w/cpp/concepts/strict_weak_order | |
| * [LessThan Comparable]: https://en.cppreference.com/w/cpp/named_req/LessThanComparable | |
| */ | |
| template <typename KeyIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| SortKeys(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyIteratorT d_keys, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream = 0) | |
| { | |
| using DispatchMergeSortT = DispatchMergeSort<KeyIteratorT, | |
| NullType *, | |
| KeyIteratorT, | |
| NullType *, | |
| OffsetT, | |
| CompareOpT>; | |
| return DispatchMergeSortT::Dispatch(d_temp_storage, | |
| temp_storage_bytes, | |
| d_keys, | |
| static_cast<NullType *>(nullptr), | |
| d_keys, | |
| static_cast<NullType *>(nullptr), | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| template <typename KeyIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| SortKeys(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyIteratorT d_keys, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream, | |
| bool debug_synchronous) | |
| { | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG | |
| return SortKeys<KeyIteratorT, OffsetT, CompareOpT>(d_temp_storage, | |
| temp_storage_bytes, | |
| d_keys, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| /** | |
| * @brief Sorts items using a merge sorting method. | |
| * | |
| * @par | |
| * - SortKeysCopy is not guaranteed to be stable. That is, suppose that `i` | |
| * and `j` are equivalent: neither one is less than the other. It is not | |
| * guaranteed that the relative order of these two elements will be | |
| * preserved by sort. | |
| * - Input array d_input_keys is not modified. | |
| * - Note that the behavior is undefined if the input and output ranges | |
| * overlap in any way. | |
| * | |
| * @par Snippet | |
| * The code snippet below illustrates the sorting of a device vector of | |
| * `int` keys. | |
| * @par | |
| * @code | |
| * #include <cub/cub.cuh> | |
| * // or equivalently <cub/device/device_merge_sort.cuh> | |
| * | |
| * // Declare, allocate, and initialize device-accessible pointers for | |
| * // sorting data | |
| * int num_items; // e.g., 7 | |
| * int *d_keys; // e.g., [8, 6, 7, 5, 3, 0, 9] | |
| * ... | |
| * | |
| * // Initialize comparator | |
| * CustomOpT custom_op; | |
| * | |
| * // Determine temporary device storage requirements | |
| * void *d_temp_storage = nullptr; | |
| * std::size_t temp_storage_bytes = 0; | |
| * cub::DeviceMergeSort::SortKeysCopy( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, num_items, custom_op); | |
| * | |
| * // Allocate temporary storage | |
| * cudaMalloc(&d_temp_storage, temp_storage_bytes); | |
| * | |
| * // Run sorting operation | |
| * cub::DeviceMergeSort::SortKeysCopy( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, num_items, custom_op); | |
| * | |
| * // d_keys <-- [0, 3, 5, 6, 7, 8, 9] | |
| * @endcode | |
| * | |
| * @tparam KeyInputIteratorT | |
| * is a model of [Random Access Iterator]. Its `value_type` is a model of | |
| * [LessThan Comparable]. This `value_type`'s ordering relation is a | |
| * *strict weak ordering* as defined in the [LessThan Comparable] | |
| * requirements. | |
| * | |
| * @tparam KeyIteratorT | |
| * is a model of [Random Access Iterator]. `KeyIteratorT` is mutable, and | |
| * its `value_type` is a model of [LessThan Comparable]. This `value_type`'s | |
| * ordering relation is a *strict weak ordering* as defined in | |
| * the [LessThan Comparable] requirements. | |
| * | |
| * @tparam OffsetT | |
| * is an integer type for global offsets. | |
| * | |
| * @tparam CompareOpT | |
| * is a type of callable object with the signature | |
| * `bool operator()(KeyT lhs, KeyT rhs)` that models | |
| * the [Strict Weak Ordering] concept. | |
| * | |
| * @param[in] d_temp_storage | |
| * Device-accessible allocation of temporary storage. When `nullptr`, the | |
| * required allocation size is written to `temp_storage_bytes` and no work | |
| * is done. | |
| * | |
| * @param[in,out] temp_storage_bytes | |
| * Reference to size in bytes of `d_temp_storage` allocation | |
| * | |
| * @param[in] d_input_keys | |
| * Pointer to the input sequence of unsorted input keys | |
| * | |
| * @param[out] d_output_keys | |
| * Pointer to the output sequence of sorted input keys | |
| * | |
| * @param[in] num_items | |
| * Number of items to sort | |
| * | |
| * @param[in] compare_op | |
| * Comparison function object which returns true if the first argument is | |
| * ordered before the second | |
| * | |
| * @param[in] stream | |
| * **[optional]** CUDA stream to launch kernels within. Default is | |
| * stream<sub>0</sub>. | |
| * | |
| * [Random Access Iterator]: https://en.cppreference.com/w/cpp/iterator/random_access_iterator | |
| * [Strict Weak Ordering]: https://en.cppreference.com/w/cpp/concepts/strict_weak_order | |
| * [LessThan Comparable]: https://en.cppreference.com/w/cpp/named_req/LessThanComparable | |
| */ | |
| template <typename KeyInputIteratorT, | |
| typename KeyIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| SortKeysCopy(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyInputIteratorT d_input_keys, | |
| KeyIteratorT d_output_keys, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream = 0) | |
| { | |
| using DispatchMergeSortT = DispatchMergeSort<KeyInputIteratorT, | |
| NullType *, | |
| KeyIteratorT, | |
| NullType *, | |
| OffsetT, | |
| CompareOpT>; | |
| return DispatchMergeSortT::Dispatch(d_temp_storage, | |
| temp_storage_bytes, | |
| d_input_keys, | |
| static_cast<NullType *>(nullptr), | |
| d_output_keys, | |
| static_cast<NullType *>(nullptr), | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| template <typename KeyInputIteratorT, | |
| typename KeyIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| SortKeysCopy(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyInputIteratorT d_input_keys, | |
| KeyIteratorT d_output_keys, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream, | |
| bool debug_synchronous) | |
| { | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG | |
| return SortKeysCopy<KeyInputIteratorT, KeyIteratorT, OffsetT, CompareOpT>( | |
| d_temp_storage, | |
| temp_storage_bytes, | |
| d_input_keys, | |
| d_output_keys, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| /** | |
| * @brief Sorts items using a merge sorting method. | |
| * | |
| * @par | |
| * StableSortPairs is stable: it preserves the relative ordering of equivalent | |
| * elements. That is, if x and y are elements such that x precedes y, | |
| * and if the two elements are equivalent (neither x < y nor y < x) then | |
| * a postcondition of stable_sort is that x still precedes y. | |
| * | |
| * @par Snippet | |
| * The code snippet below illustrates the sorting of a device vector of `int` | |
| * keys with associated vector of `int` values. | |
| * @par | |
| * @code | |
| * #include <cub/cub.cuh> | |
| * // or equivalently <cub/device/device_merge_sort.cuh> | |
| * | |
| * // Declare, allocate, and initialize device-accessible pointers for | |
| * // sorting data | |
| * int num_items; // e.g., 7 | |
| * int *d_keys; // e.g., [8, 6, 6, 5, 3, 0, 9] | |
| * int *d_values; // e.g., [0, 1, 2, 3, 4, 5, 6] | |
| * ... | |
| * | |
| * // Initialize comparator | |
| * CustomOpT custom_op; | |
| * | |
| * // Determine temporary device storage requirements | |
| * void *d_temp_storage = nullptr; | |
| * std::size_t temp_storage_bytes = 0; | |
| * cub::DeviceMergeSort::StableSortPairs( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, d_values, num_items, custom_op); | |
| * | |
| * // Allocate temporary storage | |
| * cudaMalloc(&d_temp_storage, temp_storage_bytes); | |
| * | |
| * // Run sorting operation | |
| * cub::DeviceMergeSort::StableSortPairs( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, d_values, num_items, custom_op); | |
| * | |
| * // d_keys <-- [0, 3, 5, 6, 6, 8, 9] | |
| * // d_values <-- [5, 4, 3, 1, 2, 0, 6] | |
| * @endcode | |
| * | |
| * @tparam KeyIteratorT | |
| * is a model of [Random Access Iterator]. `KeyIteratorT` is mutable, and | |
| * its `value_type` is a model of [LessThan Comparable]. This `value_type`'s | |
| * ordering relation is a *strict weak ordering* as defined in | |
| * the [LessThan Comparable] requirements. | |
| * | |
| * @tparam ValueIteratorT | |
| * is a model of [Random Access Iterator], and `ValueIteratorT` is mutable. | |
| * | |
| * @tparam OffsetT | |
| * is an integer type for global offsets. | |
| * | |
| * @tparam CompareOpT | |
| * is a type of callable object with the signature | |
| * `bool operator()(KeyT lhs, KeyT rhs)` that models | |
| * the [Strict Weak Ordering] concept. | |
| * | |
| * @param[in] d_temp_storage | |
| * Device-accessible allocation of temporary storage. When `nullptr`, the | |
| * required allocation size is written to `temp_storage_bytes` and no work | |
| * is done. | |
| * | |
| * @param[in,out] temp_storage_bytes | |
| * Reference to size in bytes of `d_temp_storage` allocation | |
| * | |
| * @param[in,out] d_keys | |
| * Pointer to the input sequence of unsorted input keys | |
| * | |
| * @param[in,out] d_items | |
| * Pointer to the input sequence of unsorted input values | |
| * | |
| * @param[in] num_items | |
| * Number of items to sort | |
| * | |
| * @param[in] compare_op | |
| * Comparison function object which returns true if the first argument is | |
| * ordered before the second | |
| * | |
| * @param[in] stream | |
| * **[optional]** CUDA stream to launch kernels within. Default is | |
| * stream<sub>0</sub>. | |
| * | |
| * [Random Access Iterator]: https://en.cppreference.com/w/cpp/iterator/random_access_iterator | |
| * [Strict Weak Ordering]: https://en.cppreference.com/w/cpp/concepts/strict_weak_order | |
| * [LessThan Comparable]: https://en.cppreference.com/w/cpp/named_req/LessThanComparable | |
| */ | |
| template <typename KeyIteratorT, | |
| typename ValueIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| StableSortPairs(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyIteratorT d_keys, | |
| ValueIteratorT d_items, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream = 0) | |
| { | |
| return SortPairs<KeyIteratorT, ValueIteratorT, OffsetT, CompareOpT>( | |
| d_temp_storage, | |
| temp_storage_bytes, | |
| d_keys, | |
| d_items, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| template <typename KeyIteratorT, | |
| typename ValueIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| StableSortPairs(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyIteratorT d_keys, | |
| ValueIteratorT d_items, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream, | |
| bool debug_synchronous) | |
| { | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG | |
| return StableSortPairs<KeyIteratorT, ValueIteratorT, OffsetT, CompareOpT>( | |
| d_temp_storage, | |
| temp_storage_bytes, | |
| d_keys, | |
| d_items, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| /** | |
| * @brief Sorts items using a merge sorting method. | |
| * | |
| * @par | |
| * StableSortKeys is stable: it preserves the relative ordering of equivalent | |
| * elements. That is, if `x` and `y` are elements such that `x` precedes `y`, | |
| * and if the two elements are equivalent (neither `x < y` nor `y < x`) then | |
| * a postcondition of stable_sort is that `x` still precedes `y`. | |
| * | |
| * @par Snippet | |
| * The code snippet below illustrates the sorting of a device vector of `int` | |
| * keys. | |
| * \par | |
| * \code | |
| * #include <cub/cub.cuh> | |
| * // or equivalently <cub/device/device_merge_sort.cuh> | |
| * | |
| * // Declare, allocate, and initialize device-accessible pointers for | |
| * // sorting data | |
| * int num_items; // e.g., 7 | |
| * int *d_keys; // e.g., [8, 6, 7, 5, 3, 0, 9] | |
| * ... | |
| * | |
| * // Initialize comparator | |
| * CustomOpT custom_op; | |
| * | |
| * // Determine temporary device storage requirements | |
| * void *d_temp_storage = nullptr; | |
| * std::size_t temp_storage_bytes = 0; | |
| * cub::DeviceMergeSort::StableSortKeys( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, num_items, custom_op); | |
| * | |
| * // Allocate temporary storage | |
| * cudaMalloc(&d_temp_storage, temp_storage_bytes); | |
| * | |
| * // Run sorting operation | |
| * cub::DeviceMergeSort::StableSortKeys( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_keys, num_items, custom_op); | |
| * | |
| * // d_keys <-- [0, 3, 5, 6, 7, 8, 9] | |
| * @endcode | |
| * | |
| * @tparam KeyIteratorT | |
| * is a model of [Random Access Iterator]. `KeyIteratorT` is mutable, and | |
| * its `value_type` is a model of [LessThan Comparable]. This `value_type`'s | |
| * ordering relation is a *strict weak ordering* as defined in | |
| * the [LessThan Comparable] requirements. | |
| * | |
| * @tparam OffsetT | |
| * is an integer type for global offsets. | |
| * | |
| * @tparam CompareOpT | |
| * is a type of callable object with the signature | |
| * `bool operator()(KeyT lhs, KeyT rhs)` that models | |
| * the [Strict Weak Ordering] concept. | |
| * | |
| * @param[in] d_temp_storage | |
| * Device-accessible allocation of temporary storage. When `nullptr`, the | |
| * required allocation size is written to `temp_storage_bytes` and no work | |
| * is done. | |
| * | |
| * @param[in,out] temp_storage_bytes | |
| * Reference to size in bytes of `d_temp_storage` allocation | |
| * | |
| * @param[in,out] d_keys | |
| * Pointer to the input sequence of unsorted input keys | |
| * | |
| * @param[in] num_items | |
| * Number of items to sort | |
| * | |
| * @param[in] compare_op | |
| * Comparison function object which returns true if the first argument is | |
| * ordered before the second | |
| * | |
| * @param[in] stream | |
| * **[optional]** CUDA stream to launch kernels within. Default is | |
| * stream<sub>0</sub>. | |
| * | |
| * [Random Access Iterator]: https://en.cppreference.com/w/cpp/iterator/random_access_iterator | |
| * [Strict Weak Ordering]: https://en.cppreference.com/w/cpp/concepts/strict_weak_order | |
| * [LessThan Comparable]: https://en.cppreference.com/w/cpp/named_req/LessThanComparable | |
| */ | |
| template <typename KeyIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| StableSortKeys(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyIteratorT d_keys, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream = 0) | |
| { | |
| return SortKeys<KeyIteratorT, OffsetT, CompareOpT>(d_temp_storage, | |
| temp_storage_bytes, | |
| d_keys, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| template <typename KeyIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| StableSortKeys(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyIteratorT d_keys, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream, | |
| bool debug_synchronous) | |
| { | |
| CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG | |
| return StableSortKeys<KeyIteratorT, OffsetT, CompareOpT>(d_temp_storage, | |
| temp_storage_bytes, | |
| d_keys, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| /** | |
| * @brief Sorts items using a merge sorting method. | |
| * | |
| * @par | |
| * - StableSortKeysCopy is stable: it preserves the relative ordering of equivalent | |
| * elements. That is, if `x` and `y` are elements such that `x` precedes `y`, | |
| * and if the two elements are equivalent (neither `x < y` nor `y < x`) then | |
| * a postcondition of stable_sort is that `x` still precedes `y`. | |
| * - Input array d_input_keys is not modified | |
| * - Note that the behavior is undefined if the input and output ranges overlap | |
| * in any way. | |
| * | |
| * @par Snippet | |
| * The code snippet below illustrates the sorting of a device vector of `int` | |
| * keys. | |
| * \par | |
| * \code | |
| * #include <cub/cub.cuh> | |
| * // or equivalently <cub/device/device_merge_sort.cuh> | |
| * | |
| * // Declare, allocate, and initialize device-accessible pointers for | |
| * // sorting data | |
| * int num_items; // e.g., 7 | |
| * int *d_input_keys; // e.g., [8, 6, 7, 5, 3, 0, 9] | |
| * int *d_output_keys; // must hold at least num_items elements | |
| * ... | |
| * | |
| * // Initialize comparator | |
| * CustomOpT custom_op; | |
| * | |
| * // Determine temporary device storage requirements | |
| * void *d_temp_storage = nullptr; | |
| * std::size_t temp_storage_bytes = 0; | |
| * cub::DeviceMergeSort::StableSortKeysCopy( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_input_keys, d_output_keys, num_items, custom_op); | |
| * | |
| * // Allocate temporary storage | |
| * cudaMalloc(&d_temp_storage, temp_storage_bytes); | |
| * | |
| * // Run sorting operation | |
| * cub::DeviceMergeSort::StableSortKeysCopy( | |
| * d_temp_storage, temp_storage_bytes, | |
| * d_input_keys, d_output_keys, num_items, custom_op); | |
| * | |
| * // d_output_keys <-- [0, 3, 5, 6, 7, 8, 9] | |
| * @endcode | |
| * | |
| * @tparam KeyInputIteratorT | |
| * is a model of [Random Access Iterator]. Its `value_type` is a model of | |
| * [LessThan Comparable]. This `value_type`'s ordering relation is a | |
| * *strict weak ordering* as defined in the [LessThan Comparable] | |
| * requirements. | |
| * | |
| * @tparam KeyIteratorT | |
| * is a model of [Random Access Iterator]. `KeyIteratorT` is mutable, and | |
| * its `value_type` is a model of [LessThan Comparable]. This `value_type`'s | |
| * ordering relation is a *strict weak ordering* as defined in | |
| * the [LessThan Comparable] requirements. | |
| * | |
| * @tparam OffsetT | |
| * is an integer type for global offsets. | |
| * | |
| * @tparam CompareOpT | |
| * is a type of callable object with the signature | |
| * `bool operator()(KeyT lhs, KeyT rhs)` that models | |
| * the [Strict Weak Ordering] concept. | |
| * | |
| * @param[in] d_temp_storage | |
| * Device-accessible allocation of temporary storage. When `nullptr`, the | |
| * required allocation size is written to `temp_storage_bytes` and no work | |
| * is done. | |
| * | |
| * @param[in,out] temp_storage_bytes | |
| * Reference to size in bytes of `d_temp_storage` allocation | |
| * | |
| * @param[in] d_input_keys | |
| * Pointer to the input sequence of unsorted input keys | |
| * | |
| * @param[out] d_output_keys | |
| * Pointer to the output sequence of sorted input keys | |
| * | |
| * @param[in] num_items | |
| * Number of elements in d_input_keys to sort | |
| * | |
| * @param[in] compare_op | |
| * Comparison function object which returns true if the first argument is | |
| * ordered before the second | |
| * | |
| * @param[in] stream | |
| * **[optional]** CUDA stream to launch kernels within. Default is | |
| * stream<sub>0</sub>. | |
| * | |
| * [Random Access Iterator]: https://en.cppreference.com/w/cpp/iterator/random_access_iterator | |
| * [Strict Weak Ordering]: https://en.cppreference.com/w/cpp/concepts/strict_weak_order | |
| * [LessThan Comparable]: https://en.cppreference.com/w/cpp/named_req/LessThanComparable | |
| */ | |
| template <typename KeyInputIteratorT, | |
| typename KeyIteratorT, | |
| typename OffsetT, | |
| typename CompareOpT> | |
| CUB_RUNTIME_FUNCTION static cudaError_t | |
| StableSortKeysCopy(void *d_temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| KeyInputIteratorT d_input_keys, | |
| KeyIteratorT d_output_keys, | |
| OffsetT num_items, | |
| CompareOpT compare_op, | |
| cudaStream_t stream = 0) | |
| { | |
| return SortKeysCopy<KeyInputIteratorT, KeyIteratorT, OffsetT, CompareOpT>(d_temp_storage, | |
| temp_storage_bytes, | |
| d_input_keys, | |
| d_output_keys, | |
| num_items, | |
| compare_op, | |
| stream); | |
| } | |
| }; | |
| CUB_NAMESPACE_END | |