| /* | |
| * Copyright 2021 NVIDIA Corporation | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| CUB_NAMESPACE_BEGIN | |
| namespace detail | |
| { | |
| namespace temporary_storage | |
| { | |
| class slot; | |
| template <typename T> | |
| class alias; | |
| template <int SlotsCount> | |
| class layout; | |
| /** | |
| * @brief Temporary storage slot that can be considered a C++ union with an | |
| * arbitrary fields count. | |
| * | |
| * @warning slot lifetime is defined by the lifetime of the associated layout. | |
| * It's impossible to request new array if layout is already mapped. | |
| * | |
| * @par A Simple Example | |
| * @code | |
| * auto slot = temporary_storage.get_slot(0); | |
| * | |
| * // Add fields into the slot | |
| * // Create an int alias with 0 elements: | |
| * auto int_array = slot->create_alias<int>(); | |
| * // Create a double alias with 2 elements: | |
| * auto double_array = slot->create_alias<double>(2); | |
| * // Create a char alias with 0 elements: | |
| * auto empty_array = slot->create_alias<char>(); | |
| * // Slot size is defined by double_array size (2 * sizeof(double)) | |
| * | |
| * if (condition) | |
| * { | |
| * int_array.grow(42); | |
| * // Now slot size is defined by int_array size (42 * sizeof(int)) | |
| * } | |
| * | |
| * // Temporary storage mapping | |
| * // ... | |
| * int *d_int_array = int_array.get(); | |
| * double *d_double_array = double_array.get(); | |
| * char *d_empty_array = empty_array.get(); // Guaranteed to return nullptr | |
| * @endcode | |
| */ | |
| class slot | |
| { | |
| std::size_t m_size{}; | |
| void *m_pointer{}; | |
| public: | |
| slot() = default; | |
| /** | |
| * @brief Returns an array of type @p T and length @p elements | |
| */ | |
| template <typename T> | |
| __host__ __device__ alias<T> create_alias(std::size_t elements = 0); | |
| private: | |
| __host__ __device__ void set_bytes_required(std::size_t new_size) | |
| { | |
| m_size = (max)(m_size, new_size); | |
| } | |
| __host__ __device__ std::size_t get_bytes_required() const | |
| { | |
| return m_size; | |
| } | |
| __host__ __device__ void set_storage(void *ptr) { m_pointer = ptr; } | |
| __host__ __device__ void *get_storage() const { return m_pointer; } | |
| template <typename T> | |
| friend class alias; | |
| template <int> | |
| friend class layout; | |
| }; | |
| /** | |
| * @brief Named memory region of a temporary storage slot | |
| * | |
| * @par Overview | |
| * This class provides a typed wrapper of a temporary slot memory region. | |
| * It can be considered as a field in the C++ union. It's only possible to | |
| * increase the array size. | |
| * | |
| * @warning alias lifetime is defined by the lifetime of the associated slot | |
| * It's impossible to grow the array if the layout is already mapped. | |
| */ | |
| template <typename T> | |
| class alias | |
| { | |
| slot &m_slot; | |
| std::size_t m_elements{}; | |
| __host__ __device__ explicit alias(slot &slot, | |
| std::size_t elements = 0) | |
| : m_slot(slot) | |
| , m_elements(elements) | |
| { | |
| this->update_slot(); | |
| } | |
| __host__ __device__ void update_slot() | |
| { | |
| m_slot.set_bytes_required(m_elements * sizeof(T)); | |
| } | |
| public: | |
| alias() = delete; | |
| /** | |
| * @brief Increases the number of elements | |
| * | |
| * @warning | |
| * This method should be called before temporary storage mapping stage. | |
| * | |
| * @param[in] new_elements Increases the memory region occupied in the | |
| * temporary slot to fit up to @p new_elements items | |
| * of type @p T. | |
| */ | |
| __host__ __device__ void grow(std::size_t new_elements) | |
| { | |
| m_elements = new_elements; | |
| this->update_slot(); | |
| } | |
| /** | |
| * @brief Returns pointer to array | |
| * | |
| * If the @p elements number is equal to zero, or storage layout isn't mapped, | |
| * @p nullptr is returned. | |
| */ | |
| __host__ __device__ T *get() const | |
| { | |
| if (m_elements == 0) | |
| { | |
| return nullptr; | |
| } | |
| return reinterpret_cast<T *>(m_slot.get_storage()); | |
| } | |
| friend class slot; | |
| }; | |
| template <typename T> | |
| __host__ __device__ alias<T> slot::create_alias(std::size_t elements) | |
| { | |
| return alias<T>(*this, elements); | |
| } | |
| /** | |
| * @brief Temporary storage layout represents a structure with | |
| * @p SlotsCount union-like fields | |
| * | |
| * The layout can be mapped to a temporary buffer only once. | |
| * | |
| * @par A Simple Example | |
| * @code | |
| * cub::detail::temporary_storage::layout<3> temporary_storage; | |
| * | |
| * auto slot_1 = temporary_storage.get_slot(0); | |
| * auto slot_2 = temporary_storage.get_slot(1); | |
| * | |
| * // Add fields into the first slot | |
| * auto int_array = slot_1->create_alias<int>(1); | |
| * auto double_array = slot_1->create_alias<double>(2); | |
| * | |
| * // Add fields into the second slot | |
| * auto char_array = slot_2->create_alias<char>(); | |
| * | |
| * // The equivalent C++ structure could look like | |
| * // struct StorageLayout | |
| * // { | |
| * // union { | |
| * // } slot_0; | |
| * // std::byte padding_0[256 - sizeof (slot_0)]; | |
| * // | |
| * // union { | |
| * // int alias_0[1]; | |
| * // double alias_1[2]; | |
| * // } slot_1; | |
| * // std::byte padding_1[256 - sizeof (slot_1)]; | |
| * // | |
| * // union { | |
| * // char alias_0[0]; | |
| * // } slot_2; | |
| * // std::byte padding_2[256 - sizeof (slot_2)]; | |
| * // }; | |
| * | |
| * // The third slot is empty | |
| * | |
| * // Temporary storage mapping | |
| * if (d_temp_storage == nullptr) | |
| * { | |
| * temp_storage_bytes = temporary_storage.get_size(); | |
| * return; | |
| * } | |
| * else | |
| * { | |
| * temporary_storage.map_to_buffer(d_temp_storage, temp_storage_bytes); | |
| * } | |
| * | |
| * // Use pointers | |
| * int *d_int_array = int_array.get(); | |
| * double *d_double_array = double_array.get(); | |
| * char *d_char_array = char_array.get(); | |
| * @endcode | |
| */ | |
| template <int SlotsCount> | |
| class layout | |
| { | |
| slot m_slots[SlotsCount]; | |
| std::size_t m_sizes[SlotsCount]; | |
| void *m_pointers[SlotsCount]; | |
| bool m_layout_was_mapped {}; | |
| public: | |
| layout() = default; | |
| __host__ __device__ slot *get_slot(int slot_id) | |
| { | |
| if (slot_id < SlotsCount) | |
| { | |
| return &m_slots[slot_id]; | |
| } | |
| return nullptr; | |
| } | |
| /** | |
| * @brief Returns required temporary storage size in bytes | |
| */ | |
| __host__ __device__ std::size_t get_size() | |
| { | |
| this->prepare_interface(); | |
| // AliasTemporaries can return error only in mapping stage, | |
| // so it's safe to ignore it here. | |
| std::size_t temp_storage_bytes{}; | |
| AliasTemporaries(nullptr, temp_storage_bytes, m_pointers, m_sizes); | |
| if (temp_storage_bytes == 0) | |
| { | |
| // The current CUB convention implies that there are two stages for each | |
| // device-scope function call. The first one returns the required storage | |
| // size. The second stage consumes temporary storage to perform some work. | |
| // The only way to distinguish between the two stages is by checking the | |
| // value of the temporary storage pointer. If zero bytes are requested, | |
| // `cudaMalloc` will return `nullptr`. This fact makes it impossible to | |
| // distinguish between the two stages, so we request some fixed amount of | |
| // bytes (even if we don't need it) to have a non-null temporary storage | |
| // pointer. | |
| return 1; | |
| } | |
| return temp_storage_bytes; | |
| } | |
| /** | |
| * @brief Maps the layout to the temporary storage buffer. | |
| */ | |
| __host__ __device__ cudaError_t map_to_buffer(void *d_temp_storage, | |
| std::size_t temp_storage_bytes) | |
| { | |
| if (m_layout_was_mapped) | |
| { | |
| return cudaErrorAlreadyMapped; | |
| } | |
| this->prepare_interface(); | |
| cudaError_t error = cudaSuccess; | |
| if ((error = AliasTemporaries(d_temp_storage, | |
| temp_storage_bytes, | |
| m_pointers, | |
| m_sizes))) | |
| { | |
| return error; | |
| } | |
| for (std::size_t slot_id = 0; slot_id < SlotsCount; slot_id++) | |
| { | |
| m_slots[slot_id].set_storage(m_pointers[slot_id]); | |
| } | |
| m_layout_was_mapped = true; | |
| return error; | |
| } | |
| private: | |
| __host__ __device__ void prepare_interface() | |
| { | |
| if (m_layout_was_mapped) | |
| { | |
| return; | |
| } | |
| for (std::size_t slot_id = 0; slot_id < SlotsCount; slot_id++) | |
| { | |
| const std::size_t slot_size = m_slots[slot_id].get_bytes_required(); | |
| m_sizes[slot_id] = slot_size; | |
| m_pointers[slot_id] = nullptr; | |
| } | |
| } | |
| }; | |
| } // namespace temporary_storage | |
| } // namespace detail | |
| CUB_NAMESPACE_END | |