first commit

c206440 over 3 years ago

8.26 kB

	// Copyright (c) Microsoft Corporation. All rights reserved.
	// Licensed under the MIT License.

	#pragma once

	#include "core/common/common.h"
	#include "core/framework/allocator_stats.h"
	#include "core/session/onnxruntime_c_api.h"
	#include "ortdevice.h"
	#include "ortmemoryinfo.h"

	// This configures the arena based allocator used by ORT
	// See docs/C_API.md for details on what these mean and how to choose these values
	struct OrtArenaCfg {
	OrtArenaCfg() : max_mem(0),
	arena_extend_strategy(-1),
	initial_chunk_size_bytes(-1),
	max_dead_bytes_per_chunk(-1),
	initial_growth_chunk_size_bytes(-1) {}
	OrtArenaCfg(size_t max_mem, int arena_extend_strategy, int initial_chunk_size_bytes,
	int max_dead_bytes_per_chunk, int initial_growth_chunk_size_bytes)
	: max_mem(max_mem),
	arena_extend_strategy(arena_extend_strategy),
	initial_chunk_size_bytes(initial_chunk_size_bytes),
	max_dead_bytes_per_chunk(max_dead_bytes_per_chunk),
	initial_growth_chunk_size_bytes(initial_growth_chunk_size_bytes) {}

	size_t max_mem; // use 0 to allow ORT to choose the default
	int arena_extend_strategy; // use -1 to allow ORT to choose the default, 0 = kNextPowerOfTwo, 1 = kSameAsRequested
	int initial_chunk_size_bytes; // use -1 to allow ORT to choose the default
	int max_dead_bytes_per_chunk; // use -1 to allow ORT to choose the default
	int initial_growth_chunk_size_bytes; // use -1 to allow ORT to choose the default
	};

	namespace onnxruntime {
	constexpr const char* CPU = "Cpu";
	constexpr const char* CUDA = "Cuda";
	constexpr const char* CUDA_PINNED = "CudaPinned";
	constexpr const char* CANN = "Cann";
	constexpr const char* CANN_PINNED = "CannPinned";
	constexpr const char* DML = "DML";
	constexpr const char* HIP = "Hip";
	constexpr const char* HIP_PINNED = "HipPinned";
	constexpr const char* OpenVINO_CPU = "OpenVINO_CPU";
	constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";

	constexpr size_t kAllocAlignment = 256;

	class IAllocator;
	class Stream;
	namespace synchronize {
	class Notification;
	}
	using WaitNotificationFn = std::function<void(Stream&, synchronize::Notification&)>;
	void* AllocateBufferWithOptions(IAllocator& allocator, size_t size, bool use_reserve, Stream* stream, WaitNotificationFn wait_fn);

	template <typename T>
	using IAllocatorUniquePtr = std::unique_ptr<T, std::function<void(T*)>>;

	class IAllocator {
	public:
	IAllocator(const OrtMemoryInfo& info) : memory_info_(info) {}
	virtual ~IAllocator() = default;
	/**
	@remarks Use SafeInt when calculating the size of memory to allocate using Alloc.
	*/
	virtual void* Alloc(size_t size) = 0;

	virtual void Free(void* p) = 0;

	// TODO: Find a better name than Reserve() and update in all places.
	// Reserve() is an interface exposed for an implementation of IAllocator
	// to optionally implement some allocation logic that by-passes any arena-based
	// logic that may be housed in the Alloc() implementation.
	// There are SessionOptions config(s) that allow users to allocate some memory
	// by-passing arena-based logic.
	// By default, the base implementation just calls Alloc().
	virtual void* Reserve(size_t size) { return Alloc(size); }

	const OrtMemoryInfo& Info() const { return memory_info_; };

	// Each implementation of IAllocator can override and provide their own implementation
	virtual void GetStats(AllocatorStats* /stats/) { return; }

	static bool CalcMemSizeForArray(size_t nmemb, size_t size, size_t* out) noexcept {
	return CalcMemSizeForArrayWithAlignment(nmemb, size, 0, out);
	}

	/**
	* Calculate the memory size for an array. The size is bounds checked using SafeInt.
	* \tparam alignment must be power of 2
	* \param nmemb Number of members or elements in the array
	* \param size Size of each element
	* \param out Total size required after any alignment is applied
	* \return true, successful. false, overflow
	*/
	[[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t alignment, size_t* out) noexcept;

	/**
	* https://cwe.mitre.org/data/definitions/190.html
	* \param alignment must be power of 2
	* \param nmemb Number of members or elements in the array
	* \param size Size of each element
	* \param out Total size required after any alignment is applied
	* \return true, successful. false, overflow
	* \remarks This was the original API and was implemented in the header. Replaced with the above version
	* implemented in the .cc file so that the SafeInt dependency is internal.
	*/
	template <size_t alignment>
	[[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t* out) noexcept;

	/**
	* allocate memory for an array which has nmemb items of data, each size bytes long
	*/
	void* AllocArray(size_t nmemb, size_t size) {
	size_t len;
	if (!CalcMemSizeForArray(nmemb, size, &len))
	return nullptr;
	return Alloc(len);
	}

	/**
	* allocate memory for an array which has nmemb items of data, each size bytes long
	*/
	template <size_t alignment>
	void* AllocArrayWithAlignment(size_t nmemb, size_t size) {
	size_t len;
	if (!CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, &len))
	return nullptr;
	return Alloc(len);
	}

	/**
	Create a std::unique_ptr that is allocated and freed by the provided IAllocator.
	@param allocator The allocator.
	@param count_or_bytes The exact bytes to allocate if T is void, otherwise the number of elements to allocate.
	@param use_reserve If true, call Reserve() instead of Alloc() to allocate memory.
	@param stream Which stream instance allocated chunk will be used with.
	@param wait_fn If the allocator want to dynamic reuse a chunk from another stream, use this wait_fn to sync on
	the target stream to make the reuse safe.
	@returns std::unique_ptr with allocated memory and deleter.
	*/
	template <typename T>
	static IAllocatorUniquePtr<T> MakeUniquePtr(std::shared_ptr<IAllocator> allocator, size_t count_or_bytes,
	bool use_reserve = false,
	Stream* stream = nullptr, WaitNotificationFn wait_fn = nullptr) {
	if (allocator == nullptr) return nullptr;
	// for now limit to fundamental types. we could support others, but to do so either we or the caller
	// needs to call the dtor for the objects, for buffers allocated on device we don't have destructor
	// static_assert(std::is_fundamental<T>::value, "Fundamental type required as no destructors are called.");

	size_t alloc_size = count_or_bytes;

	// if T is not void, 'count_or_bytes' == number of items so allow for that
	if constexpr (!std::is_void<T>::value) {
	// sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't
	// reachable if T is void. use std::conditional to 'use' void* in the sizeof call
	if (!CalcMemSizeForArray(
	count_or_bytes, sizeof(typename std::conditional<std::is_void<T>::value, void*, T>::type), &alloc_size)) {
	return nullptr;
	}
	}

	// allocate
	T* p = static_cast<T>(AllocateBufferWithOptions(allocator, alloc_size, use_reserve, stream, std::move(wait_fn)));
	return IAllocatorUniquePtr<T>{
	p,
	[allocator = std::move(allocator)](T* p) { allocator->Free(p); }};
	}

	private:
	OrtMemoryInfo memory_info_;
	};

	template <size_t alignment>
	bool IAllocator::CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t* out) noexcept {
	return CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, out);
	}

	class CPUAllocator : public IAllocator {
	public:
	explicit CPUAllocator(const OrtMemoryInfo& memory_info) : IAllocator(memory_info) {}

	CPUAllocator() : IAllocator(OrtMemoryInfo(CPU, OrtAllocatorType::OrtDeviceAllocator)) {}

	void* Alloc(size_t size) override;
	void Free(void* p) override;
	};

	using AllocatorPtr = std::shared_ptr<IAllocator>;

	void* AllocatorDefaultAlloc(size_t size);
	void AllocatorDefaultFree(void* p);
	} // namespace onnxruntime