Spaces:
Runtime error
Runtime error
| /****************************************************************************** | |
| * Copyright (c) 2011, Duane Merrill. All rights reserved. | |
| * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions are met: | |
| * * Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * * Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in the | |
| * documentation and/or other materials provided with the distribution. | |
| * * Neither the name of the NVIDIA CORPORATION nor the | |
| * names of its contributors may be used to endorse or promote products | |
| * derived from this software without specific prior written permission. | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY | |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| ******************************************************************************/ | |
| /****************************************************************************** | |
| * Test of BlockHistogram utilities | |
| ******************************************************************************/ | |
| // Ensure printing of CUDA runtime errors to console | |
| #define CUB_STDERR | |
| #include <stdio.h> | |
| #include <limits> | |
| #include <string> | |
| #include <typeinfo> | |
| #include <cub/block/block_histogram.cuh> | |
| #include <cub/block/block_load.cuh> | |
| #include <cub/block/block_store.cuh> | |
| #include <cub/util_allocator.cuh> | |
| #include "test_util.h" | |
| using namespace cub; | |
| //--------------------------------------------------------------------- | |
| // Globals, constants and typedefs | |
| //--------------------------------------------------------------------- | |
| bool g_verbose = false; | |
| int g_timing_iterations = 0; | |
| int g_repeat = 0; | |
| CachingDeviceAllocator g_allocator(true); | |
| //--------------------------------------------------------------------- | |
| // Test kernels | |
| //--------------------------------------------------------------------- | |
| /** | |
| * BlockHistogram test kernel. | |
| */ | |
| template < | |
| int BINS, | |
| int BLOCK_THREADS, | |
| int ITEMS_PER_THREAD, | |
| BlockHistogramAlgorithm ALGORITHM, | |
| typename T, | |
| typename HistoCounter> | |
| __global__ void BlockHistogramKernel( | |
| T *d_samples, | |
| HistoCounter *d_histogram) | |
| { | |
| // Parameterize BlockHistogram type for our thread block | |
| typedef BlockHistogram<T, BLOCK_THREADS, ITEMS_PER_THREAD, BINS, ALGORITHM> BlockHistogram; | |
| // Allocate temp storage in shared memory | |
| __shared__ typename BlockHistogram::TempStorage temp_storage; | |
| // Per-thread tile data | |
| T data[ITEMS_PER_THREAD]; | |
| LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_samples, data); | |
| // Test histo (writing directly to histogram buffer in global) | |
| BlockHistogram(temp_storage).Histogram(data, d_histogram); | |
| } | |
| /** | |
| * Initialize problem (and solution) | |
| */ | |
| template < | |
| int BINS, | |
| typename SampleT> | |
| void Initialize( | |
| GenMode gen_mode, | |
| SampleT *h_samples, | |
| int *h_histograms_linear, | |
| int num_samples) | |
| { | |
| // Init bins | |
| for (int bin = 0; bin < BINS; ++bin) | |
| { | |
| h_histograms_linear[bin] = 0; | |
| } | |
| if (g_verbose) printf("Samples: \n"); | |
| // Initialize interleaved channel samples and histogram them correspondingly | |
| for (int i = 0; i < num_samples; ++i) | |
| { | |
| InitValue(gen_mode, h_samples[i], i); | |
| h_samples[i] %= BINS; | |
| if (g_verbose) std::cout << CoutCast(h_samples[i]) << ", "; | |
| h_histograms_linear[h_samples[i]]++; | |
| } | |
| if (g_verbose) printf("\n\n"); | |
| } | |
| /** | |
| * Test BlockHistogram | |
| */ | |
| template < | |
| typename SampleT, | |
| int BINS, | |
| int BLOCK_THREADS, | |
| int ITEMS_PER_THREAD, | |
| BlockHistogramAlgorithm ALGORITHM> | |
| void Test( | |
| GenMode gen_mode) | |
| { | |
| int num_samples = BLOCK_THREADS * ITEMS_PER_THREAD; | |
| printf("cub::BlockHistogram %s %d %s samples (%dB), %d bins, %d threads, gen-mode %s\n", | |
| (ALGORITHM == BLOCK_HISTO_SORT) ? "BLOCK_HISTO_SORT" : "BLOCK_HISTO_ATOMIC", | |
| num_samples, | |
| typeid(SampleT).name(), | |
| (int) sizeof(SampleT), | |
| BINS, | |
| BLOCK_THREADS, | |
| (gen_mode == RANDOM) ? "RANDOM" : (gen_mode == INTEGER_SEED) ? "SEQUENTIAL" : "HOMOGENOUS"); | |
| fflush(stdout); | |
| // Allocate host arrays | |
| SampleT *h_samples = new SampleT[num_samples]; | |
| int *h_reference = new int[BINS]; | |
| // Initialize problem | |
| Initialize<BINS>(gen_mode, h_samples, h_reference, num_samples); | |
| // Allocate problem device arrays | |
| SampleT *d_samples = NULL; | |
| int *d_histogram = NULL; | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_samples, sizeof(SampleT) * num_samples)); | |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_histogram, sizeof(int) * BINS)); | |
| // Initialize/clear device arrays | |
| CubDebugExit(cudaMemcpy(d_samples, h_samples, sizeof(SampleT) * num_samples, cudaMemcpyHostToDevice)); | |
| CubDebugExit(cudaMemset(d_histogram, 0, sizeof(int) * BINS)); | |
| // Run kernel | |
| BlockHistogramKernel<BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM><<<1, BLOCK_THREADS>>>( | |
| d_samples, | |
| d_histogram); | |
| // Check for correctness (and display results, if specified) | |
| int compare = CompareDeviceResults((int*) h_reference, d_histogram, BINS, g_verbose, g_verbose); | |
| printf("\t%s\n\n", compare ? "FAIL" : "PASS"); | |
| // Flush any stdout/stderr | |
| CubDebugExit(cudaPeekAtLastError()); | |
| CubDebugExit(cudaDeviceSynchronize()); | |
| fflush(stdout); | |
| fflush(stderr); | |
| // Cleanup | |
| if (h_samples) delete[] h_samples; | |
| if (h_reference) delete[] h_reference; | |
| if (d_samples) CubDebugExit(g_allocator.DeviceFree(d_samples)); | |
| if (d_histogram) CubDebugExit(g_allocator.DeviceFree(d_histogram)); | |
| // Correctness asserts | |
| AssertEquals(0, compare); | |
| } | |
| /** | |
| * Test different sample distributions | |
| */ | |
| template < | |
| typename SampleT, | |
| int BINS, | |
| int BLOCK_THREADS, | |
| int ITEMS_PER_THREAD, | |
| BlockHistogramAlgorithm ALGORITHM> | |
| void Test() | |
| { | |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(UNIFORM); | |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(INTEGER_SEED); | |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(RANDOM); | |
| } | |
| /** | |
| * Test different ALGORITHM | |
| */ | |
| template < | |
| typename SampleT, | |
| int BINS, | |
| int BLOCK_THREADS, | |
| int ITEMS_PER_THREAD> | |
| void Test() | |
| { | |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_SORT>(); | |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_ATOMIC>(); | |
| } | |
| /** | |
| * Test different ITEMS_PER_THREAD | |
| */ | |
| template < | |
| typename SampleT, | |
| int BINS, | |
| int BLOCK_THREADS> | |
| void Test() | |
| { | |
| Test<SampleT, BINS, BLOCK_THREADS, 1>(); | |
| Test<SampleT, BINS, BLOCK_THREADS, 5>(); | |
| } | |
| /** | |
| * Test different BLOCK_THREADS | |
| */ | |
| template < | |
| typename SampleT, | |
| int BINS> | |
| void Test() | |
| { | |
| Test<SampleT, BINS, 32>(); | |
| Test<SampleT, BINS, 96>(); | |
| Test<SampleT, BINS, 128>(); | |
| } | |
| //--------------------------------------------------------------------- | |
| // Main | |
| //--------------------------------------------------------------------- | |
| /** | |
| * Main | |
| */ | |
| int main(int argc, char** argv) | |
| { | |
| // Initialize command line | |
| CommandLineArgs args(argc, argv); | |
| g_verbose = args.CheckCmdLineFlag("v"); | |
| args.GetCmdLineArgument("repeat", g_repeat); | |
| // Print usage | |
| if (args.CheckCmdLineFlag("help")) | |
| { | |
| printf("%s " | |
| "[--n=<total input samples across all channels> " | |
| "[--device=<device-id>] " | |
| "[--repeat=<repetitions of entire test suite>]" | |
| "[--v] " | |
| "\n", argv[0]); | |
| exit(0); | |
| } | |
| // Initialize device | |
| CubDebugExit(args.DeviceInit()); | |
| #ifdef QUICK_TEST | |
| // Compile/run quick tests | |
| Test<unsigned char, 256, 128, 4, BLOCK_HISTO_SORT>(RANDOM); | |
| Test<unsigned char, 256, 128, 4, BLOCK_HISTO_ATOMIC>(RANDOM); | |
| #else | |
| // Compile/run thorough tests | |
| for (int i = 0; i <= g_repeat; ++i) | |
| { | |
| Test<unsigned char, 32>(); | |
| Test<unsigned char, 256>(); | |
| Test<unsigned short, 1024>(); | |
| } | |
| #endif | |
| return 0; | |
| } | |