Phi2-Fine-Tuning / phivenv /Lib /site-packages /torch /include /c10 /mobile /CPUCachingAllocator.h

Add files using upload-large-folder tool

d1d4335 verified 3 months ago

4.27 kB

	#pragma once

	#include <cstddef>
	#include <mutex>

	#include <c10/macros/Export.h>
	#include <c10/util/SmallVector.h>
	#include <c10/util/flat_hash_map.h>

	/*
	* CPUCachingAllocator:
	* DISCLAIMER:
	* This is subject to change (beta) and only supported on mobile builds.
	* If code snippet such as in 'Usage pattern' is used outside of mobile
	* build you will not observe the intended behavior.
	* See below for more information.
	* Why?
	* It has been observed that some mobile platforms, such as pixel 3, return
	* memory aggressively to the system. This results in page faults in some
	* cases and ends up hurting performance. This caching allocator aims to address
	* that. Furthermore it also allows users to specify their own allocator by
	* implementing allocate/free virtual interfaces. What are the cons? There are
	* some cons that were observed where use of caching allocator led to worse
	* performance on some platforms. Reason being that the caching mechanism used
	* by this allocator left us worse off compared to the corresponding platform's
	* tuned memory allocator. In that case it seemed better to not use this
	* allocator. Note there are some ideas to fix this in the works.
	*
	* Usage:
	* Usage pattern:
	* Instantiate and own the caching allocator.
	* std::unique_ptr<c10::CPUCachingAllocator> caching_allocator =
	* std::make_unique<c10::CPUCachingAllocator>();
	* Use caching allocator with a scoped guard at inference time.
	* {
	* WithCPUCachingAllocatorGuard(caching_allocator.get());
	* ... model.forward(...);
	* }
	*/

	namespace c10 {

	class C10_API CPUCachingAllocator {
	/*
	* What it does:
	* Caches all the allocations carried out by this allocator.
	* Cache key is the size of the allocation.
	* If requested size is found in the cache returns the cached pointer.
	* What it does not do:
	* No speculative allocation for any future allocations.
	*/
	private:
	inline void* allocate_and_cache(const size_t bytes);
	void free_cached();

	protected:
	// Invariants.
	// 1. If memory is ever allocated via this allocator then
	// the pointer will exist in allocation_map_, unless the allocator
	// returned the memory to OS via free_cached.
	// 1.1. Therefore even when the said memory is "freed" via this
	// allocator (and thus cached), it will continue to stay
	// in allocation_map_. Furthermore it will also exist in
	// available_map_. Thus an allocated memory pointer can be in both
	// allocation_map_ and available_map_ simultaneously.
	// 2. Memory pointer maybe removed from allocation_map_, when it
	// is freed outside of the scope of this allocator, but was allocated
	// by this allocator.
	// 3. Available map only contains that memory which was allocated
	// by this allocator and subsequently freed by this allocator.
	// As a result of above invariants, allocated memory ptr cannot be in
	// available_map_ unless it is in allocation_map_ as well.
	ska::flat_hash_map<size_t, c10::SmallVector<void*, 16>> available_map_;
	static ska::flat_hash_map<void*, size_t> allocation_map_;
	// Since allocation_map, which is a global instance, is mutated/read via
	// all public APIs we need a global mutex.
	static std::mutex mutex_;

	public:
	static void record_free(void* ptr);
	virtual ~CPUCachingAllocator();
	// Checks the cache to see if allocation of size bytes can be found.
	// If so return cached memory, else
	// allocates memory, records it for caching and returns.
	virtual void* allocate(const size_t bytes);
	// Checks if the memory being freed is was marked for allocation by
	// an earlier call to allocate. If so cache the allocation.
	// Otherwise free.
	virtual void free(void* ptr);
	};

	CPUCachingAllocator* GetDefaultCPUCachingAllocator();

	bool ThreadLocalCachingAllocatorEnabled();
	CPUCachingAllocator* GetThreadLocalCachingAllocator();

	class C10_API WithCPUCachingAllocatorGuard {
	public:
	WithCPUCachingAllocatorGuard(CPUCachingAllocator* allocator);
	~WithCPUCachingAllocatorGuard();

	private:
	CPUCachingAllocator* prev_caching_allocator_ptr_{nullptr};
	};

	} // namespace c10