| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifndef NCNN_ALLOCATOR_H |
| | #define NCNN_ALLOCATOR_H |
| |
|
| | #ifdef _WIN32 |
| | #define WIN32_LEAN_AND_MEAN |
| | #include <windows.h> |
| | #endif |
| |
|
| | #include "platform.h" |
| |
|
| | #include <stdlib.h> |
| |
|
| | #if NCNN_VULKAN |
| | #include <vulkan/vulkan.h> |
| | #endif |
| |
|
| | #if NCNN_PLATFORM_API |
| | #if __ANDROID_API__ >= 26 |
| | #include <android/hardware_buffer.h> |
| | #endif |
| | #endif |
| |
|
| | namespace ncnn { |
| |
|
| | |
| | #if NCNN_AVX512 |
| | #define NCNN_MALLOC_ALIGN 64 |
| | #elif NCNN_AVX |
| | #define NCNN_MALLOC_ALIGN 32 |
| | #else |
| | #define NCNN_MALLOC_ALIGN 16 |
| | #endif |
| |
|
| | |
| | |
| | |
| | #define NCNN_MALLOC_OVERREAD 64 |
| |
|
| | |
| | |
| | |
| | template<typename _Tp> |
| | static NCNN_FORCEINLINE _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp)) |
| | { |
| | return (_Tp*)(((size_t)ptr + n - 1) & -n); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | static NCNN_FORCEINLINE size_t alignSize(size_t sz, int n) |
| | { |
| | return (sz + n - 1) & -n; |
| | } |
| |
|
| | static NCNN_FORCEINLINE void* fastMalloc(size_t size) |
| | { |
| | #if _MSC_VER |
| | return _aligned_malloc(size, NCNN_MALLOC_ALIGN); |
| | #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) |
| | void* ptr = 0; |
| | if (posix_memalign(&ptr, NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD)) |
| | ptr = 0; |
| | return ptr; |
| | #elif __ANDROID__ && __ANDROID_API__ < 17 |
| | return memalign(NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD); |
| | #else |
| | unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD); |
| | if (!udata) |
| | return 0; |
| | unsigned char** adata = alignPtr((unsigned char**)udata + 1, NCNN_MALLOC_ALIGN); |
| | adata[-1] = udata; |
| | return adata; |
| | #endif |
| | } |
| |
|
| | static NCNN_FORCEINLINE void fastFree(void* ptr) |
| | { |
| | if (ptr) |
| | { |
| | #if _MSC_VER |
| | _aligned_free(ptr); |
| | #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) |
| | free(ptr); |
| | #elif __ANDROID__ && __ANDROID_API__ < 17 |
| | free(ptr); |
| | #else |
| | unsigned char* udata = ((unsigned char**)ptr)[-1]; |
| | free(udata); |
| | #endif |
| | } |
| | } |
| |
|
| | #if NCNN_THREADS |
| | |
| | #if defined __riscv && !defined __riscv_atomic |
| | |
| | static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) |
| | { |
| | int tmp = *addr; |
| | *addr += delta; |
| | return tmp; |
| | } |
| | #elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) |
| | |
| | #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta) |
| | #elif defined __GNUC__ |
| | #if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) |
| | #ifdef __ATOMIC_ACQ_REL |
| | #define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) |
| | #else |
| | #define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) |
| | #endif |
| | #else |
| | #if defined __ATOMIC_ACQ_REL && !defined __clang__ |
| | |
| | #define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) |
| | #else |
| | #define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) |
| | #endif |
| | #endif |
| | #elif defined _MSC_VER && !defined RC_INVOKED |
| | #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) |
| | #else |
| | |
| | static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) |
| | { |
| | int tmp = *addr; |
| | *addr += delta; |
| | return tmp; |
| | } |
| | #endif |
| | #else |
| | static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) |
| | { |
| | int tmp = *addr; |
| | *addr += delta; |
| | return tmp; |
| | } |
| | #endif |
| |
|
| | class NCNN_EXPORT Allocator |
| | { |
| | public: |
| | virtual ~Allocator(); |
| | virtual void* fastMalloc(size_t size) = 0; |
| | virtual void fastFree(void* ptr) = 0; |
| | }; |
| |
|
| | class PoolAllocatorPrivate; |
| | class NCNN_EXPORT PoolAllocator : public Allocator |
| | { |
| | public: |
| | PoolAllocator(); |
| | ~PoolAllocator(); |
| |
|
| | |
| | |
| | void set_size_compare_ratio(float scr); |
| |
|
| | |
| | |
| | void set_size_drop_threshold(size_t); |
| |
|
| | |
| | void clear(); |
| |
|
| | virtual void* fastMalloc(size_t size); |
| | virtual void fastFree(void* ptr); |
| |
|
| | private: |
| | PoolAllocator(const PoolAllocator&); |
| | PoolAllocator& operator=(const PoolAllocator&); |
| |
|
| | private: |
| | PoolAllocatorPrivate* const d; |
| | }; |
| |
|
| | class UnlockedPoolAllocatorPrivate; |
| | class NCNN_EXPORT UnlockedPoolAllocator : public Allocator |
| | { |
| | public: |
| | UnlockedPoolAllocator(); |
| | ~UnlockedPoolAllocator(); |
| |
|
| | |
| | |
| | void set_size_compare_ratio(float scr); |
| |
|
| | |
| | |
| | void set_size_drop_threshold(size_t); |
| |
|
| | |
| | void clear(); |
| |
|
| | virtual void* fastMalloc(size_t size); |
| | virtual void fastFree(void* ptr); |
| |
|
| | private: |
| | UnlockedPoolAllocator(const UnlockedPoolAllocator&); |
| | UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&); |
| |
|
| | private: |
| | UnlockedPoolAllocatorPrivate* const d; |
| | }; |
| |
|
| | #if NCNN_VULKAN |
| |
|
| | class VulkanDevice; |
| |
|
| | class NCNN_EXPORT VkBufferMemory |
| | { |
| | public: |
| | VkBuffer buffer; |
| |
|
| | |
| | size_t offset; |
| | size_t capacity; |
| |
|
| | VkDeviceMemory memory; |
| | void* mapped_ptr; |
| |
|
| | |
| | mutable VkAccessFlags access_flags; |
| | mutable VkPipelineStageFlags stage_flags; |
| |
|
| | |
| | int refcount; |
| | }; |
| |
|
| | class NCNN_EXPORT VkImageMemory |
| | { |
| | public: |
| | VkImage image; |
| | VkImageView imageview; |
| |
|
| | |
| | int width; |
| | int height; |
| | int depth; |
| | VkFormat format; |
| |
|
| | VkDeviceMemory memory; |
| | void* mapped_ptr; |
| |
|
| | |
| | size_t bind_offset; |
| | size_t bind_capacity; |
| |
|
| | |
| | mutable VkAccessFlags access_flags; |
| | mutable VkImageLayout image_layout; |
| | mutable VkPipelineStageFlags stage_flags; |
| |
|
| | |
| | mutable int command_refcount; |
| |
|
| | |
| | int refcount; |
| | }; |
| |
|
| | class NCNN_EXPORT VkAllocator |
| | { |
| | public: |
| | explicit VkAllocator(const VulkanDevice* _vkdev); |
| | virtual ~VkAllocator(); |
| |
|
| | virtual void clear(); |
| |
|
| | virtual VkBufferMemory* fastMalloc(size_t size) = 0; |
| | virtual void fastFree(VkBufferMemory* ptr) = 0; |
| | virtual int flush(VkBufferMemory* ptr); |
| | virtual int invalidate(VkBufferMemory* ptr); |
| |
|
| | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; |
| | virtual void fastFree(VkImageMemory* ptr) = 0; |
| |
|
| | public: |
| | const VulkanDevice* vkdev; |
| | uint32_t buffer_memory_type_index; |
| | uint32_t image_memory_type_index; |
| | uint32_t reserved_type_index; |
| | bool mappable; |
| | bool coherent; |
| |
|
| | protected: |
| | VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage); |
| | VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); |
| | VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); |
| |
|
| | VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); |
| | VkImageView create_imageview(VkImage image, VkFormat format); |
| | }; |
| |
|
| | class VkBlobAllocatorPrivate; |
| | class NCNN_EXPORT VkBlobAllocator : public VkAllocator |
| | { |
| | public: |
| | explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); |
| | virtual ~VkBlobAllocator(); |
| |
|
| | public: |
| | |
| | virtual void clear(); |
| |
|
| | virtual VkBufferMemory* fastMalloc(size_t size); |
| | virtual void fastFree(VkBufferMemory* ptr); |
| | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); |
| | virtual void fastFree(VkImageMemory* ptr); |
| |
|
| | private: |
| | VkBlobAllocator(const VkBlobAllocator&); |
| | VkBlobAllocator& operator=(const VkBlobAllocator&); |
| |
|
| | private: |
| | VkBlobAllocatorPrivate* const d; |
| | }; |
| |
|
| | class VkWeightAllocatorPrivate; |
| | class NCNN_EXPORT VkWeightAllocator : public VkAllocator |
| | { |
| | public: |
| | explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); |
| | virtual ~VkWeightAllocator(); |
| |
|
| | public: |
| | |
| | virtual void clear(); |
| |
|
| | public: |
| | virtual VkBufferMemory* fastMalloc(size_t size); |
| | virtual void fastFree(VkBufferMemory* ptr); |
| | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); |
| | virtual void fastFree(VkImageMemory* ptr); |
| |
|
| | private: |
| | VkWeightAllocator(const VkWeightAllocator&); |
| | VkWeightAllocator& operator=(const VkWeightAllocator&); |
| |
|
| | private: |
| | VkWeightAllocatorPrivate* const d; |
| | }; |
| |
|
| | class VkStagingAllocatorPrivate; |
| | class NCNN_EXPORT VkStagingAllocator : public VkAllocator |
| | { |
| | public: |
| | explicit VkStagingAllocator(const VulkanDevice* vkdev); |
| | virtual ~VkStagingAllocator(); |
| |
|
| | public: |
| | |
| | |
| | void set_size_compare_ratio(float scr); |
| |
|
| | |
| | virtual void clear(); |
| |
|
| | virtual VkBufferMemory* fastMalloc(size_t size); |
| | virtual void fastFree(VkBufferMemory* ptr); |
| | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); |
| | virtual void fastFree(VkImageMemory* ptr); |
| |
|
| | private: |
| | VkStagingAllocator(const VkStagingAllocator&); |
| | VkStagingAllocator& operator=(const VkStagingAllocator&); |
| |
|
| | private: |
| | VkStagingAllocatorPrivate* const d; |
| | }; |
| |
|
| | class VkWeightStagingAllocatorPrivate; |
| | class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator |
| | { |
| | public: |
| | explicit VkWeightStagingAllocator(const VulkanDevice* vkdev); |
| | virtual ~VkWeightStagingAllocator(); |
| |
|
| | public: |
| | virtual VkBufferMemory* fastMalloc(size_t size); |
| | virtual void fastFree(VkBufferMemory* ptr); |
| | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); |
| | virtual void fastFree(VkImageMemory* ptr); |
| |
|
| | private: |
| | VkWeightStagingAllocator(const VkWeightStagingAllocator&); |
| | VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&); |
| |
|
| | private: |
| | VkWeightStagingAllocatorPrivate* const d; |
| | }; |
| |
|
| | #if NCNN_PLATFORM_API |
| | #if __ANDROID_API__ >= 26 |
| | class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator |
| | { |
| | public: |
| | VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb); |
| | virtual ~VkAndroidHardwareBufferImageAllocator(); |
| |
|
| | public: |
| | virtual VkBufferMemory* fastMalloc(size_t size); |
| | virtual void fastFree(VkBufferMemory* ptr); |
| | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); |
| | virtual void fastFree(VkImageMemory* ptr); |
| |
|
| | private: |
| | VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&); |
| | VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&); |
| |
|
| | public: |
| | int init(); |
| |
|
| | int width() const; |
| | int height() const; |
| | uint64_t external_format() const; |
| |
|
| | public: |
| | AHardwareBuffer* hb; |
| | AHardwareBuffer_Desc bufferDesc; |
| | VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties; |
| | VkAndroidHardwareBufferPropertiesANDROID bufferProperties; |
| | VkSamplerYcbcrConversionKHR samplerYcbcrConversion; |
| | }; |
| | #endif |
| | #endif |
| |
|
| | #endif |
| |
|
| | } |
| |
|
| | #endif |
| |
|