Spaces:
Runtime error
Runtime error
| /****************************************************************************** | |
| * Copyright (c) 2011, Duane Merrill. All rights reserved. | |
| * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions are met: | |
| * * Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * * Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in the | |
| * documentation and/or other materials provided with the distribution. | |
| * * Neither the name of the NVIDIA CORPORATION nor the | |
| * names of its contributors may be used to endorse or promote products | |
| * derived from this software without specific prior written permission. | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY | |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| ******************************************************************************/ | |
| /****************************************************************************** | |
| * Test of iterator utilities | |
| ******************************************************************************/ | |
| // Ensure printing of CUDA runtime errors to console | |
| #define CUB_STDERR | |
| #include <iterator> | |
| #include <stdio.h> | |
| #include <typeinfo> | |
| #include <cub/iterator/arg_index_input_iterator.cuh> | |
| #include <cub/iterator/cache_modified_input_iterator.cuh> | |
| #include <cub/iterator/cache_modified_output_iterator.cuh> | |
| #include <cub/iterator/constant_input_iterator.cuh> | |
| #include <cub/iterator/counting_input_iterator.cuh> | |
| #include <cub/iterator/tex_obj_input_iterator.cuh> | |
| #include <cub/iterator/tex_ref_input_iterator.cuh> | |
| #include <cub/iterator/transform_input_iterator.cuh> | |
| #include <cub/util_type.cuh> | |
| #include <cub/util_allocator.cuh> | |
| #include "test_util.h" | |
| #include <thrust/device_ptr.h> | |
| #include <thrust/copy.h> | |
| using namespace cub; | |
| //--------------------------------------------------------------------- | |
| // Globals, constants and typedefs | |
| //--------------------------------------------------------------------- | |
| bool g_verbose = false; | |
| CachingDeviceAllocator g_allocator(true); | |
| // Dispatch types | |
| enum Backend | |
| { | |
| CUB, // CUB method | |
| THRUST, // Thrust method | |
| CDP, // GPU-based (dynamic parallelism) dispatch to CUB method | |
| }; | |
| template <typename T> | |
| struct TransformOp | |
| { | |
| // Increment transform | |
| __host__ __device__ __forceinline__ T operator()(T input) const | |
| { | |
| T addend; | |
| InitValue(INTEGER_SEED, addend, 1); | |
| return input + addend; | |
| } | |
| }; | |
| struct SelectOp | |
| { | |
| template <typename T> | |
| __host__ __device__ __forceinline__ bool operator()(T input) | |
| { | |
| return true; | |
| } | |
| }; | |
| //--------------------------------------------------------------------- | |
| // Test kernels | |
| //--------------------------------------------------------------------- | |
| /** | |
| * Test random access input iterator | |
| */ | |
| template < | |
| typename InputIteratorT, | |
| typename T> | |
| __global__ void Kernel( | |
| InputIteratorT d_in, | |
| T *d_out, | |
| InputIteratorT *d_itrs) | |
| { | |
| d_out[0] = *d_in; // Value at offset 0 | |
| d_out[1] = d_in[100]; // Value at offset 100 | |
| d_out[2] = *(d_in + 1000); // Value at offset 1000 | |
| d_out[3] = *(d_in + 10000); // Value at offset 10000 | |
| d_in++; | |
| d_out[4] = d_in[0]; // Value at offset 1 | |
| d_in += 20; | |
| d_out[5] = d_in[0]; // Value at offset 21 | |
| d_itrs[0] = d_in; // Iterator at offset 21 | |
| d_in -= 10; | |
| d_out[6] = d_in[0]; // Value at offset 11; | |
| d_in -= 11; | |
| d_out[7] = d_in[0]; // Value at offset 0 | |
| d_itrs[1] = d_in; // Iterator at offset 0 | |
| } | |
| //--------------------------------------------------------------------- | |
| // Host testing subroutines | |
| //--------------------------------------------------------------------- | |
| /** | |
| * Run iterator test on device | |
| */ | |
| template < | |
| typename InputIteratorT, | |
| typename T, | |
| int TEST_VALUES> | |
| void Test( | |
| InputIteratorT d_in, | |
| T (&h_reference)[TEST_VALUES]) | |
| { | |
| // Allocate device arrays | |
| T *d_out = NULL; | |
| InputIteratorT *d_itrs = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * TEST_VALUES)); | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_itrs, sizeof(InputIteratorT) * 2)); | |
| int compare; | |
| // Run unguarded kernel | |
| Kernel<<<1, 1>>>(d_in, d_out, d_itrs); | |
| CubDebugExit(cudaPeekAtLastError()); | |
| CubDebugExit(cudaDeviceSynchronize()); | |
| // Check results | |
| compare = CompareDeviceResults(h_reference, d_out, TEST_VALUES, g_verbose, g_verbose); | |
| printf("\tValues: %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| // Check iterator at offset 21 | |
| InputIteratorT h_itr = d_in + 21; | |
| compare = CompareDeviceResults(&h_itr, d_itrs, 1, g_verbose, g_verbose); | |
| printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| // Check iterator at offset 0 | |
| compare = CompareDeviceResults(&d_in, d_itrs + 1, 1, g_verbose, g_verbose); | |
| printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| // Cleanup | |
| if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); | |
| if (d_itrs) CubDebugExit(g_allocator.DeviceFree(d_itrs)); | |
| } | |
| /** | |
| * Test constant iterator | |
| */ | |
| template <typename T> | |
| void TestConstant(T base) | |
| { | |
| printf("\nTesting constant iterator on type %s (base: %lld)\n", typeid(T).name(), (unsigned long long) (base)); fflush(stdout); | |
| // | |
| // Test iterator manipulation in kernel | |
| // | |
| T h_reference[8] = {base, base, base, base, base, base, base, base}; | |
| ConstantInputIterator<T> d_itr(base); | |
| Test(d_itr, h_reference); | |
| #if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer | |
| // | |
| // Test with thrust::copy_if() | |
| // | |
| int copy_items = 100; | |
| T *h_copy = new T[copy_items]; | |
| T *d_copy = NULL; | |
| for (int i = 0; i < copy_items; ++i) | |
| h_copy[i] = d_itr[i]; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * copy_items)); | |
| thrust::device_ptr<T> d_copy_wrapper(d_copy); | |
| thrust::copy_if(d_itr, d_itr + copy_items, d_copy_wrapper, SelectOp()); | |
| int compare = CompareDeviceResults(h_copy, d_copy, copy_items, g_verbose, g_verbose); | |
| printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| if (h_copy) delete[] h_copy; | |
| if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); | |
| #endif // THRUST_VERSION | |
| } | |
| /** | |
| * Test counting iterator | |
| */ | |
| template <typename T> | |
| void TestCounting(T base) | |
| { | |
| printf("\nTesting counting iterator on type %s (base: %d) \n", typeid(T).name(), int(base)); fflush(stdout); | |
| // | |
| // Test iterator manipulation in kernel | |
| // | |
| // Initialize reference data | |
| T h_reference[8]; | |
| h_reference[0] = base + 0; // Value at offset 0 | |
| h_reference[1] = base + 100; // Value at offset 100 | |
| h_reference[2] = base + 1000; // Value at offset 1000 | |
| h_reference[3] = base + 10000; // Value at offset 10000 | |
| h_reference[4] = base + 1; // Value at offset 1 | |
| h_reference[5] = base + 21; // Value at offset 21 | |
| h_reference[6] = base + 11; // Value at offset 11 | |
| h_reference[7] = base + 0; // Value at offset 0; | |
| CountingInputIterator<T> d_itr(base); | |
| Test(d_itr, h_reference); | |
| #if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer | |
| // | |
| // Test with thrust::copy_if() | |
| // | |
| unsigned long long max_items = ((1ull << ((sizeof(T) * 8) - 1)) - 1); | |
| size_t copy_items = (size_t) CUB_MIN(max_items - base, 100); // potential issue with differencing overflows when T is a smaller type than can handle the offset | |
| T *h_copy = new T[copy_items]; | |
| T *d_copy = NULL; | |
| for (unsigned long long i = 0; i < copy_items; ++i) | |
| h_copy[i] = d_itr[i]; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * copy_items)); | |
| thrust::device_ptr<T> d_copy_wrapper(d_copy); | |
| thrust::copy_if(d_itr, d_itr + copy_items, d_copy_wrapper, SelectOp()); | |
| int compare = CompareDeviceResults(h_copy, d_copy, copy_items, g_verbose, g_verbose); | |
| printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| if (h_copy) delete[] h_copy; | |
| if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); | |
| #endif // THRUST_VERSION | |
| } | |
| /** | |
| * Test modified iterator | |
| */ | |
| template <typename T, typename CastT> | |
| void TestModified() | |
| { | |
| printf("\nTesting cache-modified iterator on type %s\n", typeid(T).name()); fflush(stdout); | |
| // | |
| // Test iterator manipulation in kernel | |
| // | |
| constexpr int TEST_VALUES = 11000; | |
| T *h_data = new T[TEST_VALUES]; | |
| for (int i = 0; i < TEST_VALUES; ++i) | |
| { | |
| RandomBits(h_data[i]); | |
| } | |
| // Allocate device arrays | |
| T *d_data = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); | |
| CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); | |
| // Initialize reference data | |
| T h_reference[8]; | |
| h_reference[0] = h_data[0]; // Value at offset 0 | |
| h_reference[1] = h_data[100]; // Value at offset 100 | |
| h_reference[2] = h_data[1000]; // Value at offset 1000 | |
| h_reference[3] = h_data[10000]; // Value at offset 10000 | |
| h_reference[4] = h_data[1]; // Value at offset 1 | |
| h_reference[5] = h_data[21]; // Value at offset 21 | |
| h_reference[6] = h_data[11]; // Value at offset 11 | |
| h_reference[7] = h_data[0]; // Value at offset 0; | |
| Test(CacheModifiedInputIterator<LOAD_DEFAULT, T>((CastT*) d_data), h_reference); | |
| Test(CacheModifiedInputIterator<LOAD_CA, T>((CastT*) d_data), h_reference); | |
| Test(CacheModifiedInputIterator<LOAD_CG, T>((CastT*) d_data), h_reference); | |
| Test(CacheModifiedInputIterator<LOAD_CS, T>((CastT*) d_data), h_reference); | |
| Test(CacheModifiedInputIterator<LOAD_CV, T>((CastT*) d_data), h_reference); | |
| Test(CacheModifiedInputIterator<LOAD_LDG, T>((CastT*) d_data), h_reference); | |
| Test(CacheModifiedInputIterator<LOAD_VOLATILE, T>((CastT*) d_data), h_reference); | |
| #if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer | |
| // | |
| // Test with thrust::copy_if() | |
| // | |
| T *d_copy = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); | |
| CacheModifiedInputIterator<LOAD_CG, T> d_in_itr((CastT*) d_data); | |
| CacheModifiedOutputIterator<STORE_CG, T> d_out_itr((CastT*) d_copy); | |
| thrust::copy_if(d_in_itr, d_in_itr + TEST_VALUES, d_out_itr, SelectOp()); | |
| int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose); | |
| printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| // Cleanup | |
| if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); | |
| #endif // THRUST_VERSION | |
| if (h_data) delete[] h_data; | |
| if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); | |
| } | |
| /** | |
| * Test transform iterator | |
| */ | |
| template <typename T, typename CastT> | |
| void TestTransform() | |
| { | |
| printf("\nTesting transform iterator on type %s\n", typeid(T).name()); fflush(stdout); | |
| // | |
| // Test iterator manipulation in kernel | |
| // | |
| constexpr int TEST_VALUES = 11000; | |
| T *h_data = new T[TEST_VALUES]; | |
| for (int i = 0; i < TEST_VALUES; ++i) | |
| { | |
| InitValue(INTEGER_SEED, h_data[i], i); | |
| } | |
| // Allocate device arrays | |
| T *d_data = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); | |
| CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); | |
| TransformOp<T> op; | |
| // Initialize reference data | |
| T h_reference[8]; | |
| h_reference[0] = op(h_data[0]); // Value at offset 0 | |
| h_reference[1] = op(h_data[100]); // Value at offset 100 | |
| h_reference[2] = op(h_data[1000]); // Value at offset 1000 | |
| h_reference[3] = op(h_data[10000]); // Value at offset 10000 | |
| h_reference[4] = op(h_data[1]); // Value at offset 1 | |
| h_reference[5] = op(h_data[21]); // Value at offset 21 | |
| h_reference[6] = op(h_data[11]); // Value at offset 11 | |
| h_reference[7] = op(h_data[0]); // Value at offset 0; | |
| TransformInputIterator<T, TransformOp<T>, CastT*> d_itr((CastT*) d_data, op); | |
| Test(d_itr, h_reference); | |
| #if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer | |
| // | |
| // Test with thrust::copy_if() | |
| // | |
| T *h_copy = new T[TEST_VALUES]; | |
| for (int i = 0; i < TEST_VALUES; ++i) | |
| h_copy[i] = op(h_data[i]); | |
| T *d_copy = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); | |
| thrust::device_ptr<T> d_copy_wrapper(d_copy); | |
| thrust::copy_if(d_itr, d_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); | |
| int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose); | |
| printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| // Cleanup | |
| if (h_copy) delete[] h_copy; | |
| if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); | |
| #endif // THRUST_VERSION | |
| if (h_data) delete[] h_data; | |
| if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); | |
| } | |
| /** | |
| * Test tex-obj texture iterator | |
| */ | |
| template <typename T, typename CastT> | |
| void TestTexObj() | |
| { | |
| printf("\nTesting tex-obj iterator on type %s\n", typeid(T).name()); fflush(stdout); | |
| // | |
| // Test iterator manipulation in kernel | |
| // | |
| const unsigned int TEST_VALUES = 11000; | |
| const unsigned int DUMMY_OFFSET = 500; | |
| const unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; | |
| T *h_data = new T[TEST_VALUES]; | |
| for (int i = 0; i < TEST_VALUES; ++i) | |
| { | |
| RandomBits(h_data[i]); | |
| } | |
| // Allocate device arrays | |
| T *d_data = NULL; | |
| T *d_dummy = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); | |
| CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); | |
| CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice)); | |
| // Initialize reference data | |
| T h_reference[8]; | |
| h_reference[0] = h_data[0]; // Value at offset 0 | |
| h_reference[1] = h_data[100]; // Value at offset 100 | |
| h_reference[2] = h_data[1000]; // Value at offset 1000 | |
| h_reference[3] = h_data[10000]; // Value at offset 10000 | |
| h_reference[4] = h_data[1]; // Value at offset 1 | |
| h_reference[5] = h_data[21]; // Value at offset 21 | |
| h_reference[6] = h_data[11]; // Value at offset 11 | |
| h_reference[7] = h_data[0]; // Value at offset 0; | |
| // Create and bind obj-based test iterator | |
| TexObjInputIterator<T> d_obj_itr; | |
| CubDebugExit(d_obj_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); | |
| Test(d_obj_itr, h_reference); | |
| #if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer | |
| // | |
| // Test with thrust::copy_if() | |
| // | |
| T *d_copy = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); | |
| thrust::device_ptr<T> d_copy_wrapper(d_copy); | |
| CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES)); | |
| thrust::copy_if(d_obj_itr, d_obj_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); | |
| int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose); | |
| printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| // Cleanup | |
| CubDebugExit(d_obj_itr.UnbindTexture()); | |
| if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); | |
| #endif // THRUST_VERSION | |
| if (h_data) delete[] h_data; | |
| if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); | |
| if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy)); | |
| } | |
| #if CUDART_VERSION >= 5050 | |
| /** | |
| * Test tex-ref texture iterator | |
| */ | |
| template <typename T, typename CastT> | |
| void TestTexRef() | |
| { | |
| printf("\nTesting tex-ref iterator on type %s\n", typeid(T).name()); fflush(stdout); | |
| // | |
| // Test iterator manipulation in kernel | |
| // | |
| constexpr int TEST_VALUES = 11000; | |
| constexpr unsigned int DUMMY_OFFSET = 500; | |
| constexpr unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; | |
| T *h_data = new T[TEST_VALUES]; | |
| for (int i = 0; i < TEST_VALUES; ++i) | |
| { | |
| RandomBits(h_data[i]); | |
| } | |
| // Allocate device arrays | |
| T *d_data = NULL; | |
| T *d_dummy = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); | |
| CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); | |
| CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice)); | |
| // Initialize reference data | |
| T h_reference[8]; | |
| h_reference[0] = h_data[0]; // Value at offset 0 | |
| h_reference[1] = h_data[100]; // Value at offset 100 | |
| h_reference[2] = h_data[1000]; // Value at offset 1000 | |
| h_reference[3] = h_data[10000]; // Value at offset 10000 | |
| h_reference[4] = h_data[1]; // Value at offset 1 | |
| h_reference[5] = h_data[21]; // Value at offset 21 | |
| h_reference[6] = h_data[11]; // Value at offset 11 | |
| h_reference[7] = h_data[0]; // Value at offset 0; | |
| // Create and bind ref-based test iterator | |
| TexRefInputIterator<T, __LINE__> d_ref_itr; | |
| CubDebugExit(d_ref_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); | |
| // Create and bind dummy iterator of same type to check with interferance | |
| TexRefInputIterator<T, __LINE__> d_ref_itr2; | |
| CubDebugExit(d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); | |
| Test(d_ref_itr, h_reference); | |
| #if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer | |
| // | |
| // Test with thrust::copy_if() | |
| // | |
| T *d_copy = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); | |
| thrust::device_ptr<T> d_copy_wrapper(d_copy); | |
| CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES)); | |
| thrust::copy_if(d_ref_itr, d_ref_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); | |
| int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose); | |
| printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); | |
| #endif // THRUST_VERSION | |
| CubDebugExit(d_ref_itr.UnbindTexture()); | |
| CubDebugExit(d_ref_itr2.UnbindTexture()); | |
| if (h_data) delete[] h_data; | |
| if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); | |
| if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy)); | |
| } | |
| /** | |
| * Test texture transform iterator | |
| */ | |
| template <typename T, typename CastT> | |
| void TestTexTransform() | |
| { | |
| printf("\nTesting tex-transform iterator on type %s\n", typeid(T).name()); fflush(stdout); | |
| // | |
| // Test iterator manipulation in kernel | |
| // | |
| constexpr int TEST_VALUES = 11000; | |
| T *h_data = new T[TEST_VALUES]; | |
| for (int i = 0; i < TEST_VALUES; ++i) | |
| { | |
| InitValue(INTEGER_SEED, h_data[i], i); | |
| } | |
| // Allocate device arrays | |
| T *d_data = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); | |
| CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); | |
| TransformOp<T> op; | |
| // Initialize reference data | |
| T h_reference[8]; | |
| h_reference[0] = op(h_data[0]); // Value at offset 0 | |
| h_reference[1] = op(h_data[100]); // Value at offset 100 | |
| h_reference[2] = op(h_data[1000]); // Value at offset 1000 | |
| h_reference[3] = op(h_data[10000]); // Value at offset 10000 | |
| h_reference[4] = op(h_data[1]); // Value at offset 1 | |
| h_reference[5] = op(h_data[21]); // Value at offset 21 | |
| h_reference[6] = op(h_data[11]); // Value at offset 11 | |
| h_reference[7] = op(h_data[0]); // Value at offset 0; | |
| // Create and bind texture iterator | |
| typedef TexRefInputIterator<T, __LINE__> TextureIterator; | |
| TextureIterator d_tex_itr; | |
| CubDebugExit(d_tex_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); | |
| // Create transform iterator | |
| TransformInputIterator<T, TransformOp<T>, TextureIterator> xform_itr(d_tex_itr, op); | |
| Test(xform_itr, h_reference); | |
| #if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer | |
| // | |
| // Test with thrust::copy_if() | |
| // | |
| T *h_copy = new T[TEST_VALUES]; | |
| for (int i = 0; i < TEST_VALUES; ++i) | |
| h_copy[i] = op(h_data[i]); | |
| T *d_copy = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); | |
| thrust::device_ptr<T> d_copy_wrapper(d_copy); | |
| thrust::copy_if(xform_itr, xform_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); | |
| int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose); | |
| printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); | |
| AssertEquals(0, compare); | |
| // Cleanup | |
| if (h_copy) delete[] h_copy; | |
| if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); | |
| #endif // THRUST_VERSION | |
| CubDebugExit(d_tex_itr.UnbindTexture()); | |
| if (h_data) delete[] h_data; | |
| if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); | |
| } | |
| #endif // CUDART_VERSION | |
| /** | |
| * Run non-integer tests | |
| */ | |
| template <typename T, typename CastT> | |
| void Test(Int2Type<false> /* is_integer */) | |
| { | |
| TestModified<T, CastT>(); | |
| TestTransform<T, CastT>(); | |
| #if CUB_CDP | |
| // Test tex-obj iterators if CUDA dynamic parallelism enabled | |
| TestTexObj<T, CastT>(type_string); | |
| #endif // CUB_CDP | |
| #if CUDART_VERSION >= 5050 | |
| // Test tex-ref iterators for CUDA 5.5 | |
| TestTexRef<T, CastT>(); | |
| TestTexTransform<T, CastT>(); | |
| #endif // CUDART_VERSION | |
| } | |
| /** | |
| * Run integer tests | |
| */ | |
| template <typename T, typename CastT> | |
| void Test(Int2Type<true> /* is_integer */) | |
| { | |
| TestConstant<T>(0); | |
| TestConstant<T>(99); | |
| TestCounting<T>(0); | |
| TestCounting<T>(99); | |
| // Run non-integer tests | |
| Test<T, CastT>(Int2Type<false>()); | |
| } | |
| /** | |
| * Run tests | |
| */ | |
| template <typename T> | |
| void Test() | |
| { | |
| enum { | |
| IS_INTEGER = (Traits<T>::CATEGORY == SIGNED_INTEGER) || (Traits<T>::CATEGORY == UNSIGNED_INTEGER) | |
| }; | |
| // Test non-const type | |
| Test<T, T>(Int2Type<IS_INTEGER>()); | |
| // Test non-const type | |
| Test<T, const T>(Int2Type<IS_INTEGER>()); | |
| } | |
| /** | |
| * Main | |
| */ | |
| int main(int argc, char** argv) | |
| { | |
| // Initialize command line | |
| CommandLineArgs args(argc, argv); | |
| g_verbose = args.CheckCmdLineFlag("v"); | |
| // Print usage | |
| if (args.CheckCmdLineFlag("help")) | |
| { | |
| printf("%s " | |
| "[--device=<device-id>] " | |
| "[--v] " | |
| "\n", argv[0]); | |
| exit(0); | |
| } | |
| // Initialize device | |
| CubDebugExit(args.DeviceInit()); | |
| // Get ptx version | |
| int ptx_version = 0; | |
| CubDebugExit(PtxVersion(ptx_version)); | |
| // Evaluate different data types | |
| Test<char>(); | |
| Test<short>(); | |
| Test<int>(); | |
| Test<long>(); | |
| Test<long long>(); | |
| Test<float>(); | |
| if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted | |
| Test<double>(); | |
| Test<char2>(); | |
| Test<short2>(); | |
| Test<int2>(); | |
| Test<long2>(); | |
| Test<longlong2>(); | |
| Test<float2>(); | |
| if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted | |
| Test<double2>(); | |
| Test<char3>(); | |
| Test<short3>(); | |
| Test<int3>(); | |
| Test<long3>(); | |
| Test<longlong3>(); | |
| Test<float3>(); | |
| if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted | |
| Test<double3>(); | |
| Test<char4>(); | |
| Test<short4>(); | |
| Test<int4>(); | |
| Test<long4>(); | |
| Test<longlong4>(); | |
| Test<float4>(); | |
| if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted | |
| Test<double4>(); | |
| Test<TestFoo>(); | |
| Test<TestBar>(); | |
| printf("\nTest complete\n"); fflush(stdout); | |
| return 0; | |
| } | |