| | #include <unittest/unittest.h> |
| | #include <thrust/for_each.h> |
| | #include <thrust/execution_policy.h> |
| | #include <algorithm> |
| |
|
| | static const size_t NUM_REGISTERS = 64; |
| |
|
| | template <size_t N> __host__ __device__ void f (int * x) { int temp = *x; f<N - 1>(x + 1); *x = temp;}; |
| | template <> __host__ __device__ void f<0>(int * ) { } |
| | template <size_t N> |
| | struct CopyFunctorWithManyRegisters |
| | { |
| | __host__ __device__ |
| | void operator()(int * ptr) |
| | { |
| | f<N>(ptr); |
| | } |
| | }; |
| |
|
| |
|
| | void TestForEachLargeRegisterFootprint() |
| | { |
| | int current_device = -1; |
| | cudaGetDevice(¤t_device); |
| | cudaDeviceProp prop; |
| | cudaGetDeviceProperties(&prop, current_device); |
| |
|
| | thrust::device_vector<int> data(NUM_REGISTERS, 12345); |
| |
|
| | thrust::device_vector<int *> input(1, thrust::raw_pointer_cast(&data[0])); |
| | |
| | thrust::for_each(input.begin(), input.end(), CopyFunctorWithManyRegisters<NUM_REGISTERS>()); |
| | } |
| | DECLARE_UNITTEST(TestForEachLargeRegisterFootprint); |
| |
|
| |
|
| | void TestForEachNLargeRegisterFootprint() |
| | { |
| | int current_device = -1; |
| | cudaGetDevice(¤t_device); |
| | cudaDeviceProp prop; |
| | cudaGetDeviceProperties(&prop, current_device); |
| |
|
| | thrust::device_vector<int> data(NUM_REGISTERS, 12345); |
| |
|
| | thrust::device_vector<int *> input(1, thrust::raw_pointer_cast(&data[0])); |
| | |
| | thrust::for_each_n(input.begin(), input.size(), CopyFunctorWithManyRegisters<NUM_REGISTERS>()); |
| | } |
| | DECLARE_UNITTEST(TestForEachNLargeRegisterFootprint); |
| |
|
| |
|
| | template <typename T> |
| | struct mark_present_for_each |
| | { |
| | T * ptr; |
| | __host__ __device__ void |
| | operator()(T x){ ptr[(int) x] = 1; } |
| | }; |
| |
|
| |
|
| | template<typename ExecutionPolicy, typename Iterator, typename Function> |
| | __global__ void for_each_kernel(ExecutionPolicy exec, Iterator first, Iterator last, Function f) |
| | { |
| | thrust::for_each(exec, first, last, f); |
| | } |
| |
|
| |
|
| | template<typename T> |
| | void TestForEachDeviceSeq(const size_t n) |
| | { |
| | const size_t output_size = std::min((size_t) 10, 2 * n); |
| | |
| | thrust::host_vector<T> h_input = unittest::random_integers<T>(n); |
| | |
| | for(size_t i = 0; i < n; i++) |
| | h_input[i] = ((size_t) h_input[i]) % output_size; |
| | |
| | thrust::device_vector<T> d_input = h_input; |
| | |
| | thrust::host_vector<T> h_output(output_size, (T) 0); |
| | thrust::device_vector<T> d_output(output_size, (T) 0); |
| | |
| | mark_present_for_each<T> h_f; |
| | mark_present_for_each<T> d_f; |
| | h_f.ptr = &h_output[0]; |
| | d_f.ptr = (&d_output[0]).get(); |
| | |
| | thrust::for_each(h_input.begin(), h_input.end(), h_f); |
| | |
| | for_each_kernel<<<1,1>>>(thrust::seq, d_input.begin(), d_input.end(), d_f); |
| | cudaError_t const err = cudaDeviceSynchronize(); |
| | ASSERT_EQUAL(cudaSuccess, err); |
| | |
| | ASSERT_EQUAL(h_output, d_output); |
| | } |
| | DECLARE_VARIABLE_UNITTEST(TestForEachDeviceSeq); |
| |
|
| |
|
| | template<typename T> |
| | void TestForEachDeviceDevice(const size_t n) |
| | { |
| | const size_t output_size = std::min((size_t) 10, 2 * n); |
| | |
| | thrust::host_vector<T> h_input = unittest::random_integers<T>(n); |
| | |
| | for(size_t i = 0; i < n; i++) |
| | h_input[i] = ((size_t) h_input[i]) % output_size; |
| | |
| | thrust::device_vector<T> d_input = h_input; |
| | |
| | thrust::host_vector<T> h_output(output_size, (T) 0); |
| | thrust::device_vector<T> d_output(output_size, (T) 0); |
| | |
| | mark_present_for_each<T> h_f; |
| | mark_present_for_each<T> d_f; |
| | h_f.ptr = &h_output[0]; |
| | d_f.ptr = (&d_output[0]).get(); |
| | |
| | thrust::for_each(h_input.begin(), h_input.end(), h_f); |
| | |
| | for_each_kernel<<<1,1>>>(thrust::device, d_input.begin(), d_input.end(), d_f); |
| | { |
| | cudaError_t const err = cudaGetLastError(); |
| | ASSERT_EQUAL(cudaSuccess, err); |
| | } |
| | { |
| | cudaError_t const err = cudaDeviceSynchronize(); |
| | ASSERT_EQUAL(cudaSuccess, err); |
| | } |
| | |
| | ASSERT_EQUAL(h_output, d_output); |
| | } |
| | DECLARE_VARIABLE_UNITTEST(TestForEachDeviceDevice); |
| |
|
| |
|
| | template<typename ExecutionPolicy, typename Iterator, typename Size, typename Function> |
| | __global__ |
| | void for_each_n_kernel(ExecutionPolicy exec, Iterator first, Size n, Function f) |
| | { |
| | thrust::for_each_n(exec, first, n, f); |
| | } |
| |
|
| |
|
| | template<typename T> |
| | void TestForEachNDeviceSeq(const size_t n) |
| | { |
| | const size_t output_size = std::min((size_t) 10, 2 * n); |
| | |
| | thrust::host_vector<T> h_input = unittest::random_integers<T>(n); |
| | |
| | for(size_t i = 0; i < n; i++) |
| | h_input[i] = static_cast<T>(((size_t) h_input[i]) % output_size); |
| | |
| | thrust::device_vector<T> d_input = h_input; |
| | |
| | thrust::host_vector<T> h_output(output_size, (T) 0); |
| | thrust::device_vector<T> d_output(output_size, (T) 0); |
| | |
| | mark_present_for_each<T> h_f; |
| | mark_present_for_each<T> d_f; |
| | h_f.ptr = &h_output[0]; |
| | d_f.ptr = (&d_output[0]).get(); |
| | |
| | thrust::for_each_n(h_input.begin(), h_input.size(), h_f); |
| | |
| | for_each_n_kernel<<<1,1>>>(thrust::seq, d_input.begin(), d_input.size(), d_f); |
| | cudaError_t const err = cudaDeviceSynchronize(); |
| | ASSERT_EQUAL(cudaSuccess, err); |
| | |
| | ASSERT_EQUAL(h_output, d_output); |
| | } |
| | DECLARE_VARIABLE_UNITTEST(TestForEachNDeviceSeq); |
| |
|
| |
|
| | template<typename T> |
| | void TestForEachNDeviceDevice(const size_t n) |
| | { |
| | const size_t output_size = std::min((size_t) 10, 2 * n); |
| | |
| | thrust::host_vector<T> h_input = unittest::random_integers<T>(n); |
| | |
| | for(size_t i = 0; i < n; i++) |
| | h_input[i] = static_cast<T>(((size_t) h_input[i]) % output_size); |
| | |
| | thrust::device_vector<T> d_input = h_input; |
| | |
| | thrust::host_vector<T> h_output(output_size, (T) 0); |
| | thrust::device_vector<T> d_output(output_size, (T) 0); |
| | |
| | mark_present_for_each<T> h_f; |
| | mark_present_for_each<T> d_f; |
| | h_f.ptr = &h_output[0]; |
| | d_f.ptr = (&d_output[0]).get(); |
| | |
| | thrust::for_each_n(h_input.begin(), h_input.size(), h_f); |
| | |
| | for_each_n_kernel<<<1,1>>>(thrust::device, d_input.begin(), d_input.size(), d_f); |
| | cudaError_t const err = cudaDeviceSynchronize(); |
| | ASSERT_EQUAL(cudaSuccess, err); |
| | |
| | ASSERT_EQUAL(h_output, d_output); |
| | } |
| | DECLARE_VARIABLE_UNITTEST(TestForEachNDeviceDevice); |
| |
|
| |
|
| | void TestForEachCudaStreams() |
| | { |
| | cudaStream_t s; |
| | cudaStreamCreate(&s); |
| | |
| | thrust::device_vector<int> input(5); |
| | thrust::device_vector<int> output(7, 0); |
| | |
| | input[0] = 3; input[1] = 2; input[2] = 3; input[3] = 4; input[4] = 6; |
| | |
| | mark_present_for_each<int> f; |
| | f.ptr = thrust::raw_pointer_cast(output.data()); |
| | |
| | thrust::for_each(thrust::cuda::par.on(s), input.begin(), input.end(), f); |
| |
|
| | cudaStreamSynchronize(s); |
| | |
| | ASSERT_EQUAL(output[0], 0); |
| | ASSERT_EQUAL(output[1], 0); |
| | ASSERT_EQUAL(output[2], 1); |
| | ASSERT_EQUAL(output[3], 1); |
| | ASSERT_EQUAL(output[4], 1); |
| | ASSERT_EQUAL(output[5], 0); |
| | ASSERT_EQUAL(output[6], 1); |
| |
|
| | cudaStreamDestroy(s); |
| | } |
| | DECLARE_UNITTEST(TestForEachCudaStreams); |
| |
|
| |
|