/****************************************************************************** * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ // Ensure printing of CUDA runtime errors to console #define CUB_STDERR #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "test_util.h" using namespace cub; constexpr bool READ_LEFT = true; constexpr bool READ_RIGHT = false; /** * \brief Generates integer sequence \f$S_n=i(i-1)/2\f$. * * The adjacent difference of this sequence produce consecutive numbers: * \f[ * p = \frac{i(i - 1)}{2} \\ * n = \frac{(i + 1) i}{2} \\ * n - p = i \\ * \frac{(i + 1) i}{2} - \frac{i (i - 1)}{2} = i \\ * (i + 1) i - i (i - 1) = 2 i \\ * (i + 1) - (i - 1) = 2 \\ * 2 = 2 * \f] */ template struct TestSequenceGenerator { template __device__ __host__ DestT operator()(SourceT index) const { return static_cast(index * (index - 1) / SourceT(2)); } }; template struct CustomDifference { template __device__ OutputT operator()(const InputT &lhs, const InputT &rhs) { return static_cast(lhs - rhs); } }; template void AdjacentDifference(void *temp_storage, std::size_t &temp_storage_bytes, IteratorT it, DifferenceOpT difference_op, NumItemsT num_items) { const bool is_default_op_in_use = std::is_same::value; if (ReadLeft) { if (is_default_op_in_use) { CubDebugExit( cub::DeviceAdjacentDifference::SubtractLeft(temp_storage, temp_storage_bytes, it, num_items)); } else { CubDebugExit( cub::DeviceAdjacentDifference::SubtractLeft(temp_storage, temp_storage_bytes, it, num_items, difference_op)); } } else { if (is_default_op_in_use) { CubDebugExit( cub::DeviceAdjacentDifference::SubtractRight(temp_storage, temp_storage_bytes, it, num_items)); } else { CubDebugExit( cub::DeviceAdjacentDifference::SubtractRight(temp_storage, temp_storage_bytes, it, num_items, difference_op)); } } } template void AdjacentDifferenceCopy(void *temp_storage, std::size_t &temp_storage_bytes, InputIteratorT input, OutputIteratorT output, DifferenceOpT difference_op, NumItemsT num_items) { const bool is_default_op_in_use = std::is_same::value; if (ReadLeft) { if (is_default_op_in_use) { CubDebugExit( cub::DeviceAdjacentDifference::SubtractLeftCopy(temp_storage, temp_storage_bytes, input, output, num_items)); } else { CubDebugExit( cub::DeviceAdjacentDifference::SubtractLeftCopy(temp_storage, temp_storage_bytes, input, output, num_items, difference_op)); } } else { if (is_default_op_in_use) { CubDebugExit( cub::DeviceAdjacentDifference::SubtractRightCopy(temp_storage, temp_storage_bytes, input, output, num_items)); } else { CubDebugExit( cub::DeviceAdjacentDifference::SubtractRightCopy(temp_storage, temp_storage_bytes, input, output, num_items, difference_op)); } } } template void AdjacentDifference(IteratorT it, DifferenceOpT difference_op, NumItemsT num_items) { std::size_t temp_storage_bytes {}; AdjacentDifference(nullptr, temp_storage_bytes, it, difference_op, num_items); thrust::device_vector temp_storage(temp_storage_bytes); AdjacentDifference(thrust::raw_pointer_cast(temp_storage.data()), temp_storage_bytes, it, difference_op, num_items); } template void AdjacentDifferenceCopy(InputIteratorT input, OutputIteratorT output, DifferenceOpT difference_op, NumItemsT num_items) { std::size_t temp_storage_bytes{}; AdjacentDifferenceCopy(nullptr, temp_storage_bytes, input, output, difference_op, num_items); thrust::device_vector temp_storage(temp_storage_bytes); AdjacentDifferenceCopy(thrust::raw_pointer_cast( temp_storage.data()), temp_storage_bytes, input, output, difference_op, num_items); } template bool CheckResult(FirstIteratorT first_begin, FirstIteratorT first_end, SecondOperatorT second_begin) { auto err = thrust::mismatch(first_begin, first_end, second_begin); if (err.first != first_end) { return false; } return true; } template void TestCopy(NumItemsT elements, DifferenceOpT difference_op) { thrust::device_vector input(elements); thrust::tabulate(input.begin(), input.end(), TestSequenceGenerator{}); thrust::device_vector output(elements, OutputT{42}); InputT *d_input = thrust::raw_pointer_cast(input.data()); OutputT *d_output = thrust::raw_pointer_cast(output.data()); using CountingIteratorT = typename thrust::counting_iterator; AdjacentDifferenceCopy(d_input, d_output, difference_op, elements); AssertTrue(CheckResult(output.begin() + 1, output.end(), CountingIteratorT(OutputT{0}))); thrust::fill(output.begin(), output.end(), OutputT{42}); AdjacentDifferenceCopy(d_input, d_output, difference_op, elements); thrust::device_vector reference(input.size()); thrust::sequence(reference.begin(), reference.end(), static_cast(0), static_cast(-1)); AssertTrue(CheckResult(output.begin(), output.end() - 1, reference.begin())); } template void TestIteratorCopy(NumItemsT elements, DifferenceOpT difference_op) { thrust::device_vector input(elements); thrust::tabulate(input.begin(), input.end(), TestSequenceGenerator{}); thrust::device_vector output(elements, OutputT{42}); using CountingIteratorT = typename thrust::counting_iterator; AdjacentDifferenceCopy(input.cbegin(), output.begin(), difference_op, elements); AssertTrue(CheckResult(output.begin() + 1, output.end(), CountingIteratorT(OutputT{0}))); thrust::fill(output.begin(), output.end(), OutputT{42}); AdjacentDifferenceCopy(input.cbegin(), output.begin(), difference_op, elements); thrust::device_vector reference(input.size()); thrust::sequence(reference.begin(), reference.end(), static_cast(0), static_cast(-1)); AssertTrue(CheckResult(output.begin(), output.end() - 1, reference.begin())); } template void TestCopy(NumItemsT elements) { TestCopy(elements, cub::Difference{}); TestCopy(elements, CustomDifference{}); TestIteratorCopy(elements, cub::Difference{}); TestIteratorCopy(elements, CustomDifference{}); } template void TestCopy(NumItemsT elements) { TestCopy(elements); TestCopy(elements); } template void Test(NumItemsT elements, DifferenceOpT difference_op) { thrust::device_vector data(elements); thrust::tabulate(data.begin(), data.end(), TestSequenceGenerator{}); T *d_data = thrust::raw_pointer_cast(data.data()); using CountingIteratorT = typename thrust::counting_iterator; AdjacentDifference(d_data, difference_op, elements); AssertTrue(CheckResult(data.begin() + 1, data.end(), CountingIteratorT(T{0}))); thrust::tabulate(data.begin(), data.end(), TestSequenceGenerator{}); AdjacentDifference(d_data, difference_op, elements); thrust::device_vector reference(data.size()); thrust::sequence(reference.begin(), reference.end(), static_cast(0), static_cast(-1)); AssertTrue(CheckResult(data.begin(), data.end() - 1, reference.begin())); } template void TestIterators(NumItemsT elements, DifferenceOpT difference_op) { thrust::device_vector data(elements); thrust::tabulate(data.begin(), data.end(), TestSequenceGenerator{}); using CountingIteratorT = typename thrust::counting_iterator; AdjacentDifference(data.begin(), difference_op, elements); AssertTrue(CheckResult(data.begin() + 1, data.end(), CountingIteratorT(T{0}))); thrust::tabulate(data.begin(), data.end(), TestSequenceGenerator{}); AdjacentDifference(data.begin(), difference_op, elements); thrust::device_vector reference(data.size()); thrust::sequence(reference.begin(), reference.end(), static_cast(0), static_cast(-1)); AssertTrue(CheckResult(data.begin(), data.end() - 1, reference.begin())); } template void Test(NumItemsT elements) { Test(elements, cub::Difference{}); Test(elements, CustomDifference{}); TestIterators(elements, cub::Difference{}); TestIterators(elements, CustomDifference{}); } template void Test(NumItemsT elements) { Test(elements); Test(elements); Test(elements); } template void TestFancyIterators(NumItemsT elements) { if (elements == 0) { return; } thrust::counting_iterator count_iter(ValueT{1}); thrust::device_vector output(elements, ValueT{42}); AdjacentDifferenceCopy(count_iter, output.begin(), cub::Difference{}, elements); AssertEquals(elements, static_cast( thrust::count(output.begin(), output.end(), ValueT(1)))); thrust::fill(output.begin(), output.end(), ValueT{}); AdjacentDifferenceCopy(count_iter, output.begin(), cub::Difference{}, elements); AssertEquals(elements - 1, static_cast( thrust::count(output.begin(), output.end() - 1, static_cast(-1)))); AssertEquals(output.back(), static_cast(elements)); thrust::constant_iterator const_iter(ValueT{}); AdjacentDifferenceCopy(const_iter, output.begin(), cub::Difference{}, elements); AssertEquals(elements, static_cast( thrust::count(output.begin(), output.end(), ValueT{}))); thrust::fill(output.begin(), output.end(), ValueT{}); AdjacentDifferenceCopy(const_iter, output.begin(), cub::Difference{}, elements); AssertEquals(elements, static_cast( thrust::count(output.begin(), output.end(), ValueT{}))); AdjacentDifferenceCopy(const_iter, thrust::make_discard_iterator(), cub::Difference{}, elements); AdjacentDifferenceCopy(const_iter, thrust::make_discard_iterator(), cub::Difference{}, elements); } template void TestFancyIterators(NumItemsT elements) { TestFancyIterators(elements); } template void TestSize(NumItemsT elements) { Test(elements); TestCopy(elements); TestFancyIterators(elements); } struct DetectWrongDifference { bool *flag; __host__ __device__ DetectWrongDifference operator++() const { return *this; } __host__ __device__ DetectWrongDifference operator*() const { return *this; } template __host__ __device__ DetectWrongDifference operator+(Difference) const { return *this; } template __host__ __device__ DetectWrongDifference operator[](Index) const { return *this; } __device__ void operator=(long long difference) const { if (difference != 1) { *flag = false; } } }; void TestAdjacentDifferenceWithBigIndexesHelper(int magnitude) { const std::size_t elements = 1ll << magnitude; thrust::device_vector all_differences_correct(1, true); thrust::counting_iterator in(1); DetectWrongDifference out = { thrust::raw_pointer_cast(all_differences_correct.data()) }; AdjacentDifferenceCopy(in, out, cub::Difference{}, elements); AssertEquals(all_differences_correct.front(), true); } void TestAdjacentDifferenceWithBigIndexes() { TestAdjacentDifferenceWithBigIndexesHelper(30); TestAdjacentDifferenceWithBigIndexesHelper(31); TestAdjacentDifferenceWithBigIndexesHelper(32); TestAdjacentDifferenceWithBigIndexesHelper(33); } struct InvocationsCounter { int *m_d_counts{}; explicit InvocationsCounter(int *d_counts) : m_d_counts(d_counts) {} __device__ int operator()(int l, int /* r */) const { atomicAdd(m_d_counts + l, 1); return l; } }; void TestAdjacentDifferenceOpInvocationsNum(int num_items) { auto in = thrust::make_counting_iterator(0); auto out = thrust::make_discard_iterator(); thrust::device_vector num_of_invocations(num_items, 0); InvocationsCounter op{thrust::raw_pointer_cast(num_of_invocations.data())}; AdjacentDifferenceCopy(in, out, op, num_items); AssertEquals( num_items - 1, thrust::count(num_of_invocations.begin() + 1, num_of_invocations.end(), 1)); AssertEquals(0, num_of_invocations[0]); thrust::fill_n(num_of_invocations.begin(), num_items, 0); AdjacentDifferenceCopy(in, out, op, num_items); AssertEquals( num_items - 1, thrust::count(num_of_invocations.begin(), num_of_invocations.end() - 1, 1)); AssertEquals(0, num_of_invocations[num_items - 1]); } void TestAdjacentDifferenceOpInvocationsNum() { for (int num_items = 1; num_items < 4096; num_items *= 2) { TestAdjacentDifferenceOpInvocationsNum(num_items); } } int main(int argc, char** argv) { CommandLineArgs args(argc, argv); // Initialize device CubDebugExit(args.DeviceInit()); TestSize(0); for (std::size_t power_of_two = 2; power_of_two < 20; power_of_two += 2) { TestSize(1ull << power_of_two); } TestAdjacentDifferenceWithBigIndexes(); TestAdjacentDifferenceOpInvocationsNum(); return 0; }