| | #include <thrust/system/cuda/vector.h> |
| | #include <thrust/system/cuda/execution_policy.h> |
| | #include <thrust/host_vector.h> |
| | #include <thrust/generate.h> |
| | #include <thrust/sort.h> |
| | #include <thrust/pair.h> |
| | #include <cstdlib> |
| | #include <iostream> |
| | #include <sstream> |
| | #include <map> |
| | #include <cassert> |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | struct not_my_pointer |
| | { |
| | not_my_pointer(void* p) |
| | : message() |
| | { |
| | std::stringstream s; |
| | s << "Pointer `" << p << "` was not allocated by this allocator."; |
| | message = s.str(); |
| | } |
| |
|
| | virtual ~not_my_pointer() {} |
| |
|
| | virtual const char* what() const |
| | { |
| | return message.c_str(); |
| | } |
| |
|
| | private: |
| | std::string message; |
| | }; |
| |
|
| | |
| | struct cached_allocator |
| | { |
| | typedef char value_type; |
| |
|
| | cached_allocator() {} |
| |
|
| | ~cached_allocator() |
| | { |
| | free_all(); |
| | } |
| |
|
| | char *allocate(std::ptrdiff_t num_bytes) |
| | { |
| | std::cout << "cached_allocator::allocate(): num_bytes == " |
| | << num_bytes |
| | << std::endl; |
| |
|
| | char *result = 0; |
| |
|
| | |
| | free_blocks_type::iterator free_block = free_blocks.find(num_bytes); |
| |
|
| | if (free_block != free_blocks.end()) |
| | { |
| | std::cout << "cached_allocator::allocate(): found a free block" |
| | << std::endl; |
| |
|
| | result = free_block->second; |
| |
|
| | |
| | free_blocks.erase(free_block); |
| | } |
| | else |
| | { |
| | |
| | |
| | try |
| | { |
| | std::cout << "cached_allocator::allocate(): allocating new block" |
| | << std::endl; |
| |
|
| | |
| | |
| | result = thrust::cuda::malloc<char>(num_bytes).get(); |
| | } |
| | catch (std::runtime_error&) |
| | { |
| | throw; |
| | } |
| | } |
| |
|
| | |
| | allocated_blocks.insert(std::make_pair(result, num_bytes)); |
| |
|
| | return result; |
| | } |
| |
|
| | void deallocate(char *ptr, size_t) |
| | { |
| | std::cout << "cached_allocator::deallocate(): ptr == " |
| | << reinterpret_cast<void*>(ptr) << std::endl; |
| |
|
| | |
| | allocated_blocks_type::iterator iter = allocated_blocks.find(ptr); |
| |
|
| | if (iter == allocated_blocks.end()) |
| | throw not_my_pointer(reinterpret_cast<void*>(ptr)); |
| |
|
| | std::ptrdiff_t num_bytes = iter->second; |
| | allocated_blocks.erase(iter); |
| |
|
| | |
| | free_blocks.insert(std::make_pair(num_bytes, ptr)); |
| | } |
| |
|
| | private: |
| | typedef std::multimap<std::ptrdiff_t, char*> free_blocks_type; |
| | typedef std::map<char*, std::ptrdiff_t> allocated_blocks_type; |
| |
|
| | free_blocks_type free_blocks; |
| | allocated_blocks_type allocated_blocks; |
| |
|
| | void free_all() |
| | { |
| | std::cout << "cached_allocator::free_all()" << std::endl; |
| |
|
| | |
| | for ( free_blocks_type::iterator i = free_blocks.begin() |
| | ; i != free_blocks.end() |
| | ; ++i) |
| | { |
| | |
| | thrust::cuda::free(thrust::cuda::pointer<char>(i->second)); |
| | } |
| |
|
| | for( allocated_blocks_type::iterator i = allocated_blocks.begin() |
| | ; i != allocated_blocks.end() |
| | ; ++i) |
| | { |
| | |
| | thrust::cuda::free(thrust::cuda::pointer<char>(i->first)); |
| | } |
| | } |
| | }; |
| |
|
| | int main() |
| | { |
| | std::size_t num_elements = 32768; |
| |
|
| | thrust::host_vector<int> h_input(num_elements); |
| |
|
| | |
| | thrust::generate(h_input.begin(), h_input.end(), rand); |
| |
|
| | thrust::cuda::vector<int> d_input = h_input; |
| | thrust::cuda::vector<int> d_result(num_elements); |
| |
|
| | std::size_t num_trials = 5; |
| |
|
| | cached_allocator alloc; |
| |
|
| | for (std::size_t i = 0; i < num_trials; ++i) |
| | { |
| | d_result = d_input; |
| |
|
| | |
| | |
| | thrust::sort(thrust::cuda::par(alloc), d_result.begin(), d_result.end()); |
| |
|
| | |
| | assert(thrust::is_sorted(d_result.begin(), d_result.end())); |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| |
|