| /****************************************************************************** | |
| * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions are met: | |
| * * Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * * Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in the | |
| * documentation and/or other materials provided with the distribution. | |
| * * Neither the name of the NVIDIA CORPORATION nor the | |
| * names of its contributors may be used to endorse or promote products | |
| * derived from this software without specific prior written permission. | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY | |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| ******************************************************************************/ | |
| // Ensure printing of CUDA runtime errors to console | |
| #define CUB_STDERR | |
| #include <cub/device/device_adjacent_difference.cuh> | |
| #include <cub/thread/thread_operators.cuh> | |
| #include <cub/util_allocator.cuh> | |
| #include <thrust/count.h> | |
| #include <thrust/device_vector.h> | |
| #include <thrust/fill.h> | |
| #include <thrust/host_vector.h> | |
| #include <thrust/iterator/constant_iterator.h> | |
| #include <thrust/iterator/counting_iterator.h> | |
| #include <thrust/random.h> | |
| #include <thrust/sequence.h> | |
| #include <thrust/shuffle.h> | |
| #include <limits> | |
| #include <memory> | |
| #include "test_util.h" | |
| using namespace cub; | |
| constexpr bool READ_LEFT = true; | |
| constexpr bool READ_RIGHT = false; | |
| /** | |
| * \brief Generates integer sequence \f$S_n=i(i-1)/2\f$. | |
| * | |
| * The adjacent difference of this sequence produce consecutive numbers: | |
| * \f[ | |
| * p = \frac{i(i - 1)}{2} \\ | |
| * n = \frac{(i + 1) i}{2} \\ | |
| * n - p = i \\ | |
| * \frac{(i + 1) i}{2} - \frac{i (i - 1)}{2} = i \\ | |
| * (i + 1) i - i (i - 1) = 2 i \\ | |
| * (i + 1) - (i - 1) = 2 \\ | |
| * 2 = 2 | |
| * \f] | |
| */ | |
| template <typename DestT> | |
| struct TestSequenceGenerator | |
| { | |
| template <typename SourceT> | |
| __device__ __host__ DestT operator()(SourceT index) const | |
| { | |
| return static_cast<DestT>(index * (index - 1) / SourceT(2)); | |
| } | |
| }; | |
| template <typename OutputT> | |
| struct CustomDifference | |
| { | |
| template <typename InputT> | |
| __device__ OutputT operator()(const InputT &lhs, const InputT &rhs) | |
| { | |
| return static_cast<OutputT>(lhs - rhs); | |
| } | |
| }; | |
| template <bool ReadLeft, | |
| typename IteratorT, | |
| typename DifferenceOpT, | |
| typename NumItemsT> | |
| void AdjacentDifference(void *temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| IteratorT it, | |
| DifferenceOpT difference_op, | |
| NumItemsT num_items) | |
| { | |
| const bool is_default_op_in_use = | |
| std::is_same<DifferenceOpT, cub::Difference>::value; | |
| if (ReadLeft) | |
| { | |
| if (is_default_op_in_use) | |
| { | |
| CubDebugExit( | |
| cub::DeviceAdjacentDifference::SubtractLeft(temp_storage, | |
| temp_storage_bytes, | |
| it, | |
| num_items)); | |
| } | |
| else | |
| { | |
| CubDebugExit( | |
| cub::DeviceAdjacentDifference::SubtractLeft(temp_storage, | |
| temp_storage_bytes, | |
| it, | |
| num_items, | |
| difference_op)); | |
| } | |
| } | |
| else | |
| { | |
| if (is_default_op_in_use) | |
| { | |
| CubDebugExit( | |
| cub::DeviceAdjacentDifference::SubtractRight(temp_storage, | |
| temp_storage_bytes, | |
| it, | |
| num_items)); | |
| } | |
| else | |
| { | |
| CubDebugExit( | |
| cub::DeviceAdjacentDifference::SubtractRight(temp_storage, | |
| temp_storage_bytes, | |
| it, | |
| num_items, | |
| difference_op)); | |
| } | |
| } | |
| } | |
| template <bool ReadLeft, | |
| typename InputIteratorT, | |
| typename OutputIteratorT, | |
| typename DifferenceOpT, | |
| typename NumItemsT> | |
| void AdjacentDifferenceCopy(void *temp_storage, | |
| std::size_t &temp_storage_bytes, | |
| InputIteratorT input, | |
| OutputIteratorT output, | |
| DifferenceOpT difference_op, | |
| NumItemsT num_items) | |
| { | |
| const bool is_default_op_in_use = | |
| std::is_same<DifferenceOpT, cub::Difference>::value; | |
| if (ReadLeft) | |
| { | |
| if (is_default_op_in_use) | |
| { | |
| CubDebugExit( | |
| cub::DeviceAdjacentDifference::SubtractLeftCopy(temp_storage, | |
| temp_storage_bytes, | |
| input, | |
| output, | |
| num_items)); | |
| } | |
| else | |
| { | |
| CubDebugExit( | |
| cub::DeviceAdjacentDifference::SubtractLeftCopy(temp_storage, | |
| temp_storage_bytes, | |
| input, | |
| output, | |
| num_items, | |
| difference_op)); | |
| } | |
| } | |
| else | |
| { | |
| if (is_default_op_in_use) | |
| { | |
| CubDebugExit( | |
| cub::DeviceAdjacentDifference::SubtractRightCopy(temp_storage, | |
| temp_storage_bytes, | |
| input, | |
| output, | |
| num_items)); | |
| } | |
| else | |
| { | |
| CubDebugExit( | |
| cub::DeviceAdjacentDifference::SubtractRightCopy(temp_storage, | |
| temp_storage_bytes, | |
| input, | |
| output, | |
| num_items, | |
| difference_op)); | |
| } | |
| } | |
| } | |
| template <bool ReadLeft, | |
| typename IteratorT, | |
| typename DifferenceOpT, | |
| typename NumItemsT> | |
| void AdjacentDifference(IteratorT it, | |
| DifferenceOpT difference_op, | |
| NumItemsT num_items) | |
| { | |
| std::size_t temp_storage_bytes {}; | |
| AdjacentDifference<ReadLeft>(nullptr, | |
| temp_storage_bytes, | |
| it, | |
| difference_op, | |
| num_items); | |
| thrust::device_vector<std::uint8_t> temp_storage(temp_storage_bytes); | |
| AdjacentDifference<ReadLeft>(thrust::raw_pointer_cast(temp_storage.data()), | |
| temp_storage_bytes, | |
| it, | |
| difference_op, | |
| num_items); | |
| } | |
| template <bool ReadLeft, | |
| typename InputIteratorT, | |
| typename OutputIteratorT, | |
| typename DifferenceOpT, | |
| typename NumItemsT> | |
| void AdjacentDifferenceCopy(InputIteratorT input, | |
| OutputIteratorT output, | |
| DifferenceOpT difference_op, | |
| NumItemsT num_items) | |
| { | |
| std::size_t temp_storage_bytes{}; | |
| AdjacentDifferenceCopy<ReadLeft>(nullptr, | |
| temp_storage_bytes, | |
| input, | |
| output, | |
| difference_op, | |
| num_items); | |
| thrust::device_vector<std::uint8_t> temp_storage(temp_storage_bytes); | |
| AdjacentDifferenceCopy<ReadLeft>(thrust::raw_pointer_cast( | |
| temp_storage.data()), | |
| temp_storage_bytes, | |
| input, | |
| output, | |
| difference_op, | |
| num_items); | |
| } | |
| template <typename FirstIteratorT, | |
| typename SecondOperatorT> | |
| bool CheckResult(FirstIteratorT first_begin, | |
| FirstIteratorT first_end, | |
| SecondOperatorT second_begin) | |
| { | |
| auto err = thrust::mismatch(first_begin, first_end, second_begin); | |
| if (err.first != first_end) | |
| { | |
| return false; | |
| } | |
| return true; | |
| } | |
| template <typename InputT, | |
| typename OutputT, | |
| typename DifferenceOpT, | |
| typename NumItemsT> | |
| void TestCopy(NumItemsT elements, DifferenceOpT difference_op) | |
| { | |
| thrust::device_vector<InputT> input(elements); | |
| thrust::tabulate(input.begin(), | |
| input.end(), | |
| TestSequenceGenerator<InputT>{}); | |
| thrust::device_vector<OutputT> output(elements, OutputT{42}); | |
| InputT *d_input = thrust::raw_pointer_cast(input.data()); | |
| OutputT *d_output = thrust::raw_pointer_cast(output.data()); | |
| using CountingIteratorT = | |
| typename thrust::counting_iterator<OutputT, | |
| thrust::use_default, | |
| std::size_t, | |
| std::size_t>; | |
| AdjacentDifferenceCopy<READ_LEFT>(d_input, | |
| d_output, | |
| difference_op, | |
| elements); | |
| AssertTrue(CheckResult(output.begin() + 1, | |
| output.end(), | |
| CountingIteratorT(OutputT{0}))); | |
| thrust::fill(output.begin(), output.end(), OutputT{42}); | |
| AdjacentDifferenceCopy<READ_RIGHT>(d_input, | |
| d_output, | |
| difference_op, | |
| elements); | |
| thrust::device_vector<OutputT> reference(input.size()); | |
| thrust::sequence(reference.begin(), | |
| reference.end(), | |
| static_cast<OutputT>(0), | |
| static_cast<OutputT>(-1)); | |
| AssertTrue(CheckResult(output.begin(), | |
| output.end() - 1, | |
| reference.begin())); | |
| } | |
| template <typename InputT, | |
| typename OutputT, | |
| typename DifferenceOpT, | |
| typename NumItemsT> | |
| void TestIteratorCopy(NumItemsT elements, DifferenceOpT difference_op) | |
| { | |
| thrust::device_vector<InputT> input(elements); | |
| thrust::tabulate(input.begin(), | |
| input.end(), | |
| TestSequenceGenerator<InputT>{}); | |
| thrust::device_vector<OutputT> output(elements, OutputT{42}); | |
| using CountingIteratorT = | |
| typename thrust::counting_iterator<OutputT, | |
| thrust::use_default, | |
| std::size_t, | |
| std::size_t>; | |
| AdjacentDifferenceCopy<READ_LEFT>(input.cbegin(), | |
| output.begin(), | |
| difference_op, | |
| elements); | |
| AssertTrue(CheckResult(output.begin() + 1, | |
| output.end(), | |
| CountingIteratorT(OutputT{0}))); | |
| thrust::fill(output.begin(), output.end(), OutputT{42}); | |
| AdjacentDifferenceCopy<READ_RIGHT>(input.cbegin(), | |
| output.begin(), | |
| difference_op, | |
| elements); | |
| thrust::device_vector<OutputT> reference(input.size()); | |
| thrust::sequence(reference.begin(), | |
| reference.end(), | |
| static_cast<OutputT>(0), | |
| static_cast<OutputT>(-1)); | |
| AssertTrue(CheckResult(output.begin(), | |
| output.end() - 1, | |
| reference.begin())); | |
| } | |
| template <typename InputT, | |
| typename OutputT, | |
| typename NumItemsT> | |
| void TestCopy(NumItemsT elements) | |
| { | |
| TestCopy<InputT, OutputT>(elements, cub::Difference{}); | |
| TestCopy<InputT, OutputT>(elements, CustomDifference<OutputT>{}); | |
| TestIteratorCopy<InputT, OutputT>(elements, cub::Difference{}); | |
| TestIteratorCopy<InputT, OutputT>(elements, CustomDifference<OutputT>{}); | |
| } | |
| template <typename NumItemsT> | |
| void TestCopy(NumItemsT elements) | |
| { | |
| TestCopy<std::uint64_t, std::int64_t >(elements); | |
| TestCopy<std::uint32_t, std::int32_t>(elements); | |
| } | |
| template <typename T, | |
| typename DifferenceOpT, | |
| typename NumItemsT> | |
| void Test(NumItemsT elements, DifferenceOpT difference_op) | |
| { | |
| thrust::device_vector<T> data(elements); | |
| thrust::tabulate(data.begin(), | |
| data.end(), | |
| TestSequenceGenerator<T>{}); | |
| T *d_data = thrust::raw_pointer_cast(data.data()); | |
| using CountingIteratorT = | |
| typename thrust::counting_iterator<T, | |
| thrust::use_default, | |
| std::size_t, | |
| std::size_t>; | |
| AdjacentDifference<READ_LEFT>(d_data, | |
| difference_op, | |
| elements); | |
| AssertTrue(CheckResult(data.begin() + 1, | |
| data.end(), | |
| CountingIteratorT(T{0}))); | |
| thrust::tabulate(data.begin(), | |
| data.end(), | |
| TestSequenceGenerator<T>{}); | |
| AdjacentDifference<READ_RIGHT>(d_data, | |
| difference_op, | |
| elements); | |
| thrust::device_vector<T> reference(data.size()); | |
| thrust::sequence(reference.begin(), | |
| reference.end(), | |
| static_cast<T>(0), | |
| static_cast<T>(-1)); | |
| AssertTrue(CheckResult(data.begin(), | |
| data.end() - 1, | |
| reference.begin())); | |
| } | |
| template <typename T, | |
| typename DifferenceOpT, | |
| typename NumItemsT> | |
| void TestIterators(NumItemsT elements, DifferenceOpT difference_op) | |
| { | |
| thrust::device_vector<T> data(elements); | |
| thrust::tabulate(data.begin(), | |
| data.end(), | |
| TestSequenceGenerator<T>{}); | |
| using CountingIteratorT = | |
| typename thrust::counting_iterator<T, | |
| thrust::use_default, | |
| std::size_t, | |
| std::size_t>; | |
| AdjacentDifference<READ_LEFT>(data.begin(), | |
| difference_op, | |
| elements); | |
| AssertTrue(CheckResult(data.begin() + 1, | |
| data.end(), | |
| CountingIteratorT(T{0}))); | |
| thrust::tabulate(data.begin(), | |
| data.end(), | |
| TestSequenceGenerator<T>{}); | |
| AdjacentDifference<READ_RIGHT>(data.begin(), | |
| difference_op, | |
| elements); | |
| thrust::device_vector<T> reference(data.size()); | |
| thrust::sequence(reference.begin(), | |
| reference.end(), | |
| static_cast<T>(0), | |
| static_cast<T>(-1)); | |
| AssertTrue(CheckResult(data.begin(), data.end() - 1, reference.begin())); | |
| } | |
| template <typename T, | |
| typename NumItemsT> | |
| void Test(NumItemsT elements) | |
| { | |
| Test<T>(elements, cub::Difference{}); | |
| Test<T>(elements, CustomDifference<T>{}); | |
| TestIterators<T>(elements, cub::Difference{}); | |
| TestIterators<T>(elements, CustomDifference<T>{}); | |
| } | |
| template <typename NumItemsT> | |
| void Test(NumItemsT elements) | |
| { | |
| Test<std::int32_t, NumItemsT>(elements); | |
| Test<std::uint32_t, NumItemsT>(elements); | |
| Test<std::uint64_t, NumItemsT>(elements); | |
| } | |
| template <typename ValueT, | |
| typename NumItemsT> | |
| void TestFancyIterators(NumItemsT elements) | |
| { | |
| if (elements == 0) | |
| { | |
| return; | |
| } | |
| thrust::counting_iterator<ValueT> count_iter(ValueT{1}); | |
| thrust::device_vector<ValueT> output(elements, ValueT{42}); | |
| AdjacentDifferenceCopy<READ_LEFT>(count_iter, | |
| output.begin(), | |
| cub::Difference{}, | |
| elements); | |
| AssertEquals(elements, | |
| static_cast<NumItemsT>( | |
| thrust::count(output.begin(), output.end(), ValueT(1)))); | |
| thrust::fill(output.begin(), output.end(), ValueT{}); | |
| AdjacentDifferenceCopy<READ_RIGHT>(count_iter, | |
| output.begin(), | |
| cub::Difference{}, | |
| elements); | |
| AssertEquals(elements - 1, | |
| static_cast<NumItemsT>( | |
| thrust::count(output.begin(), | |
| output.end() - 1, | |
| static_cast<ValueT>(-1)))); | |
| AssertEquals(output.back(), static_cast<ValueT>(elements)); | |
| thrust::constant_iterator<ValueT> const_iter(ValueT{}); | |
| AdjacentDifferenceCopy<READ_LEFT>(const_iter, | |
| output.begin(), | |
| cub::Difference{}, | |
| elements); | |
| AssertEquals(elements, | |
| static_cast<NumItemsT>( | |
| thrust::count(output.begin(), output.end(), ValueT{}))); | |
| thrust::fill(output.begin(), output.end(), ValueT{}); | |
| AdjacentDifferenceCopy<READ_RIGHT>(const_iter, | |
| output.begin(), | |
| cub::Difference{}, | |
| elements); | |
| AssertEquals(elements, | |
| static_cast<NumItemsT>( | |
| thrust::count(output.begin(), output.end(), ValueT{}))); | |
| AdjacentDifferenceCopy<READ_LEFT>(const_iter, | |
| thrust::make_discard_iterator(), | |
| cub::Difference{}, | |
| elements); | |
| AdjacentDifferenceCopy<READ_RIGHT>(const_iter, | |
| thrust::make_discard_iterator(), | |
| cub::Difference{}, | |
| elements); | |
| } | |
| template <typename NumItemsT> | |
| void TestFancyIterators(NumItemsT elements) | |
| { | |
| TestFancyIterators<std::uint64_t, NumItemsT>(elements); | |
| } | |
| template <typename NumItemsT> | |
| void TestSize(NumItemsT elements) | |
| { | |
| Test(elements); | |
| TestCopy(elements); | |
| TestFancyIterators(elements); | |
| } | |
| struct DetectWrongDifference | |
| { | |
| bool *flag; | |
| __host__ __device__ DetectWrongDifference operator++() const | |
| { | |
| return *this; | |
| } | |
| __host__ __device__ DetectWrongDifference operator*() const | |
| { | |
| return *this; | |
| } | |
| template <typename Difference> | |
| __host__ __device__ DetectWrongDifference operator+(Difference) const | |
| { | |
| return *this; | |
| } | |
| template <typename Index> | |
| __host__ __device__ DetectWrongDifference operator[](Index) const | |
| { | |
| return *this; | |
| } | |
| __device__ void operator=(long long difference) const | |
| { | |
| if (difference != 1) | |
| { | |
| *flag = false; | |
| } | |
| } | |
| }; | |
| void TestAdjacentDifferenceWithBigIndexesHelper(int magnitude) | |
| { | |
| const std::size_t elements = 1ll << magnitude; | |
| thrust::device_vector<bool> all_differences_correct(1, true); | |
| thrust::counting_iterator<long long> in(1); | |
| DetectWrongDifference out = { | |
| thrust::raw_pointer_cast(all_differences_correct.data()) | |
| }; | |
| AdjacentDifferenceCopy<READ_LEFT>(in, out, cub::Difference{}, elements); | |
| AssertEquals(all_differences_correct.front(), true); | |
| } | |
| void TestAdjacentDifferenceWithBigIndexes() | |
| { | |
| TestAdjacentDifferenceWithBigIndexesHelper(30); | |
| TestAdjacentDifferenceWithBigIndexesHelper(31); | |
| TestAdjacentDifferenceWithBigIndexesHelper(32); | |
| TestAdjacentDifferenceWithBigIndexesHelper(33); | |
| } | |
| struct InvocationsCounter | |
| { | |
| int *m_d_counts{}; | |
| explicit InvocationsCounter(int *d_counts) : m_d_counts(d_counts) {} | |
| __device__ int operator()(int l, int /* r */) const | |
| { | |
| atomicAdd(m_d_counts + l, 1); | |
| return l; | |
| } | |
| }; | |
| void TestAdjacentDifferenceOpInvocationsNum(int num_items) | |
| { | |
| auto in = thrust::make_counting_iterator(0); | |
| auto out = thrust::make_discard_iterator(); | |
| thrust::device_vector<int> num_of_invocations(num_items, 0); | |
| InvocationsCounter op{thrust::raw_pointer_cast(num_of_invocations.data())}; | |
| AdjacentDifferenceCopy<READ_LEFT>(in, out, op, num_items); | |
| AssertEquals( | |
| num_items - 1, | |
| thrust::count(num_of_invocations.begin() + 1, num_of_invocations.end(), 1)); | |
| AssertEquals(0, num_of_invocations[0]); | |
| thrust::fill_n(num_of_invocations.begin(), num_items, 0); | |
| AdjacentDifferenceCopy<READ_RIGHT>(in, out, op, num_items); | |
| AssertEquals( | |
| num_items - 1, | |
| thrust::count(num_of_invocations.begin(), num_of_invocations.end() - 1, 1)); | |
| AssertEquals(0, num_of_invocations[num_items - 1]); | |
| } | |
| void TestAdjacentDifferenceOpInvocationsNum() | |
| { | |
| for (int num_items = 1; num_items < 4096; num_items *= 2) | |
| { | |
| TestAdjacentDifferenceOpInvocationsNum(num_items); | |
| } | |
| } | |
| int main(int argc, char** argv) | |
| { | |
| CommandLineArgs args(argc, argv); | |
| // Initialize device | |
| CubDebugExit(args.DeviceInit()); | |
| TestSize(0); | |
| for (std::size_t power_of_two = 2; power_of_two < 20; power_of_two += 2) | |
| { | |
| TestSize(1ull << power_of_two); | |
| } | |
| TestAdjacentDifferenceWithBigIndexes(); | |
| TestAdjacentDifferenceOpInvocationsNum(); | |
| return 0; | |
| } | |