| /****************************************************************************** |
| * Copyright (c) 2011, Duane Merrill. All rights reserved. |
| * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * * Neither the name of the NVIDIA CORPORATION nor the |
| * names of its contributors may be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| ******************************************************************************/ |
| |
| /****************************************************************************** |
| * Test of BlockHistogram utilities |
| ******************************************************************************/ |
| |
| // Ensure printing of CUDA runtime errors to console |
| #define CUB_STDERR |
| |
| #include <stdio.h> |
| #include <limits> |
| #include <string> |
| #include <typeinfo> |
| |
| #include <cub/block/block_histogram.cuh> |
| #include <cub/block/block_load.cuh> |
| #include <cub/block/block_store.cuh> |
| #include <cub/util_allocator.cuh> |
| |
| #include "test_util.h" |
| |
| using namespace cub; |
| |
| |
| //--------------------------------------------------------------------- |
| // Globals, constants and typedefs |
| //--------------------------------------------------------------------- |
| |
| bool g_verbose = false; |
| int g_timing_iterations = 0; |
| int g_repeat = 0; |
| CachingDeviceAllocator g_allocator(true); |
| |
| |
| //--------------------------------------------------------------------- |
| // Test kernels |
| //--------------------------------------------------------------------- |
| |
| /** |
| * BlockHistogram test kernel. |
| */ |
| template < |
| int BINS, |
| int BLOCK_THREADS, |
| int ITEMS_PER_THREAD, |
| BlockHistogramAlgorithm ALGORITHM, |
| typename T, |
| typename HistoCounter> |
| __global__ void BlockHistogramKernel( |
| T *d_samples, |
| HistoCounter *d_histogram) |
| { |
| // Parameterize BlockHistogram type for our thread block |
| typedef BlockHistogram<T, BLOCK_THREADS, ITEMS_PER_THREAD, BINS, ALGORITHM> BlockHistogram; |
| |
| // Allocate temp storage in shared memory |
| __shared__ typename BlockHistogram::TempStorage temp_storage; |
| |
| // Per-thread tile data |
| T data[ITEMS_PER_THREAD]; |
| LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_samples, data); |
| |
| // Test histo (writing directly to histogram buffer in global) |
| BlockHistogram(temp_storage).Histogram(data, d_histogram); |
| } |
| |
| |
| /** |
| * Initialize problem (and solution) |
| */ |
| template < |
| int BINS, |
| typename SampleT> |
| void Initialize( |
| GenMode gen_mode, |
| SampleT *h_samples, |
| int *h_histograms_linear, |
| int num_samples) |
| { |
| // Init bins |
| for (int bin = 0; bin < BINS; ++bin) |
| { |
| h_histograms_linear[bin] = 0; |
| } |
| |
| if (g_verbose) printf("Samples: \n"); |
| |
| // Initialize interleaved channel samples and histogram them correspondingly |
| for (int i = 0; i < num_samples; ++i) |
| { |
| InitValue(gen_mode, h_samples[i], i); |
| h_samples[i] %= BINS; |
| |
| if (g_verbose) std::cout << CoutCast(h_samples[i]) << ", "; |
| |
| h_histograms_linear[h_samples[i]]++; |
| } |
| |
| if (g_verbose) printf("\n\n"); |
| } |
| |
|
|
| /** |
| * Test BlockHistogram |
| */ |
| template < |
| typename SampleT, |
| int BINS, |
| int BLOCK_THREADS, |
| int ITEMS_PER_THREAD, |
| BlockHistogramAlgorithm ALGORITHM> |
| void Test( |
| GenMode gen_mode) |
| { |
| int num_samples = BLOCK_THREADS * ITEMS_PER_THREAD; |
|
|
| printf("cub::BlockHistogram %s %d %s samples (%dB), %d bins, %d threads, gen-mode %s\n", |
| (ALGORITHM == BLOCK_HISTO_SORT) ? "BLOCK_HISTO_SORT" : "BLOCK_HISTO_ATOMIC", |
| num_samples, |
| typeid(SampleT).name(), |
| (int) sizeof(SampleT), |
| BINS, |
| BLOCK_THREADS, |
| (gen_mode == RANDOM) ? "RANDOM" : (gen_mode == INTEGER_SEED) ? "SEQUENTIAL" : "HOMOGENOUS"); |
| fflush(stdout); |
| |
| // Allocate host arrays |
| SampleT *h_samples = new SampleT[num_samples]; |
| int *h_reference = new int[BINS]; |
| |
| // Initialize problem |
| Initialize<BINS>(gen_mode, h_samples, h_reference, num_samples); |
| |
| // Allocate problem device arrays |
| SampleT *d_samples = NULL; |
| int *d_histogram = NULL; |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_samples, sizeof(SampleT) * num_samples)); |
| CubDebugExit(g_allocator.DeviceAllocate((void**)&d_histogram, sizeof(int) * BINS)); |
| |
| // Initialize/clear device arrays |
| CubDebugExit(cudaMemcpy(d_samples, h_samples, sizeof(SampleT) * num_samples, cudaMemcpyHostToDevice)); |
| CubDebugExit(cudaMemset(d_histogram, 0, sizeof(int) * BINS)); |
| |
| // Run kernel |
| BlockHistogramKernel<BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM><<<1, BLOCK_THREADS>>>( |
| d_samples, |
| d_histogram); |
| |
| // Check for correctness (and display results, if specified) |
| int compare = CompareDeviceResults((int*) h_reference, d_histogram, BINS, g_verbose, g_verbose); |
| printf("\t%s\n\n", compare ? "FAIL" : "PASS"); |
| |
| // Flush any stdout/stderr |
| CubDebugExit(cudaPeekAtLastError()); |
| CubDebugExit(cudaDeviceSynchronize()); |
| fflush(stdout); |
| fflush(stderr); |
| |
| // Cleanup |
| if (h_samples) delete[] h_samples; |
| if (h_reference) delete[] h_reference; |
| if (d_samples) CubDebugExit(g_allocator.DeviceFree(d_samples)); |
| if (d_histogram) CubDebugExit(g_allocator.DeviceFree(d_histogram)); |
| |
| // Correctness asserts |
| AssertEquals(0, compare); |
| } |
| |
|
|
| /** |
| * Test different sample distributions |
| */ |
| template < |
| typename SampleT, |
| int BINS, |
| int BLOCK_THREADS, |
| int ITEMS_PER_THREAD, |
| BlockHistogramAlgorithm ALGORITHM> |
| void Test() |
| { |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(UNIFORM); |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(INTEGER_SEED); |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(RANDOM); |
| } |
| |
| |
| /** |
| * Test different ALGORITHM |
| */ |
| template < |
| typename SampleT, |
| int BINS, |
| int BLOCK_THREADS, |
| int ITEMS_PER_THREAD> |
| void Test() |
| { |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_SORT>(); |
| Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_ATOMIC>(); |
| } |
| |
| |
| /** |
| * Test different ITEMS_PER_THREAD |
| */ |
| template < |
| typename SampleT, |
| int BINS, |
| int BLOCK_THREADS> |
| void Test() |
| { |
| Test<SampleT, BINS, BLOCK_THREADS, 1>(); |
| Test<SampleT, BINS, BLOCK_THREADS, 5>(); |
| } |
| |
| |
| /** |
| * Test different BLOCK_THREADS |
| */ |
| template < |
| typename SampleT, |
| int BINS> |
| void Test() |
| { |
| Test<SampleT, BINS, 32>(); |
| Test<SampleT, BINS, 96>(); |
| Test<SampleT, BINS, 128>(); |
| } |
| |
| |
| |
| |
| |
| //--------------------------------------------------------------------- |
| // Main |
| //--------------------------------------------------------------------- |
| |
| /** |
| * Main |
| */ |
| int main(int argc, char** argv) |
| { |
| // Initialize command line |
| CommandLineArgs args(argc, argv); |
| g_verbose = args.CheckCmdLineFlag("v"); |
| args.GetCmdLineArgument("repeat", g_repeat); |
| |
| // Print usage |
| if (args.CheckCmdLineFlag("help")) |
| { |
| printf("%s " |
| "[--n=<total input samples across all channels> " |
| "[--device=<device-id>] " |
| "[--repeat=<repetitions of entire test suite>]" |
| "[--v] " |
| "\n", argv[0]); |
| exit(0); |
| } |
| |
| // Initialize device |
| CubDebugExit(args.DeviceInit()); |
| |
| #ifdef QUICK_TEST |
| |
| // Compile/run quick tests |
| Test<unsigned char, 256, 128, 4, BLOCK_HISTO_SORT>(RANDOM); |
| Test<unsigned char, 256, 128, 4, BLOCK_HISTO_ATOMIC>(RANDOM); |
| |
| #else |
| |
| // Compile/run thorough tests |
| for (int i = 0; i <= g_repeat; ++i) |
| { |
| Test<unsigned char, 32>(); |
| Test<unsigned char, 256>(); |
| Test<unsigned short, 1024>(); |
| } |
| |
| #endif |
|
|
| return 0; |
| } |
| |
|
|
|
|
|
|