Spaces:
Build error
Build error
| /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. | |
| Licensed under the Apache License, Version 2.0 (the "License"); | |
| you may not use this file except in compliance with the License. | |
| You may obtain a copy of the License at | |
| http://www.apache.org/licenses/LICENSE-2.0 | |
| Unless required by applicable law or agreed to in writing, software | |
| distributed under the License is distributed on an "AS IS" BASIS, | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| See the License for the specific language governing permissions and | |
| limitations under the License. | |
| ==============================================================================*/ | |
| namespace tensorflow { | |
| // Attributes for a single allocation call. Different calls to the same | |
| // allocator could potentially have different allocation attributes. | |
| struct AllocationAttributes { | |
| // If the first attempt to allocate the memory fails, the allocation | |
| // should return immediately without retrying. | |
| // An example use case is optional scratch spaces where a failure | |
| // has only performance impact. | |
| bool no_retry_on_failure = false; | |
| // If a Tensor is allocated without the following set to true, then | |
| // it is logged as an unknown allocation. During execution Tensors | |
| // should be allocated through the OpKernelContext which records | |
| // which Op is performing the allocation, and sets this flag to | |
| // true. | |
| bool allocation_will_be_logged = false; | |
| }; | |
| // Runtime statistics collected by an allocator. | |
| struct AllocatorStats { | |
| int64 num_allocs; // Number of allocations. | |
| int64 bytes_in_use; // Number of bytes in use. | |
| int64 max_bytes_in_use; // The maximum bytes in use. | |
| int64 max_alloc_size; // The max single allocation seen. | |
| // The upper limit what the allocator can allocate, if such a limit | |
| // is known. Certain allocator may return 0 to indicate the limit is | |
| // unknown. | |
| int64 bytes_limit; | |
| AllocatorStats() { Clear(); } | |
| void Clear(); | |
| string DebugString() const; | |
| }; | |
| // Allocator is an abstract interface for allocating and deallocating | |
| // device memory. | |
| class Allocator { | |
| public: | |
| // Align to 64 byte boundary. | |
| static constexpr size_t kAllocatorAlignment = 64; | |
| // Align to 32 byte boundary. | |
| static constexpr size_t kAllocatorAlignment = 32; | |
| virtual ~Allocator(); | |
| // Return a string identifying this allocator | |
| virtual string Name() = 0; | |
| // Return an uninitialized block of memory that is "num_bytes" bytes | |
| // in size. The returned pointer is guaranteed to be aligned to a | |
| // multiple of "alignment" bytes. | |
| // REQUIRES: "alignment" is a power of 2. | |
| virtual void* AllocateRaw(size_t alignment, size_t num_bytes) = 0; | |
| // Return an uninitialized block of memory that is "num_bytes" bytes | |
| // in size with specified allocation attributes. The returned pointer is | |
| // guaranteed to be aligned to a multiple of "alignment" bytes. | |
| // REQUIRES: "alignment" is a power of 2. | |
| virtual void* AllocateRaw(size_t alignment, size_t num_bytes, | |
| const AllocationAttributes& allocation_attr) { | |
| // The default behavior is to use the implementation without any allocation | |
| // attributes. | |
| return AllocateRaw(alignment, num_bytes); | |
| } | |
| // Deallocate a block of memory pointer to by "ptr" | |
| // REQUIRES: "ptr" was previously returned by a call to AllocateRaw | |
| virtual void DeallocateRaw(void* ptr) = 0; | |
| // Convenience functions to do typed allocation. C++ constructors | |
| // and destructors are invoked for complex types if necessary, | |
| // depending on the concrete Allocator implementation. May return | |
| // NULL if the tensor has too many elements to represent in a single | |
| // allocation. | |
| template <typename T> | |
| T* Allocate(size_t num_elements) { | |
| return Allocate<T>(num_elements, AllocationAttributes()); | |
| } | |
| template <typename T> | |
| T* Allocate(size_t num_elements, | |
| const AllocationAttributes& allocation_attr) { | |
| // TODO(jeff): Do we need to allow clients to pass in alignment | |
| // requirements? | |
| if (num_elements > (std::numeric_limits<size_t>::max() / sizeof(T))) { | |
| return NULL; | |
| } | |
| void* p = AllocateRaw(kAllocatorAlignment, sizeof(T) * num_elements, | |
| allocation_attr); | |
| T* typed_p = reinterpret_cast<T*>(p); | |
| if (typed_p) RunCtor<T>(typed_p, num_elements); | |
| return typed_p; | |
| } | |
| template <typename T> | |
| void Deallocate(T* ptr, size_t num_elements) { | |
| if (ptr) { | |
| RunDtor<T>(ptr, num_elements); | |
| DeallocateRaw(ptr); | |
| } | |
| } | |
| // Returns true if this allocator tracks the sizes of allocations. | |
| // RequestedSize and AllocatedSize must be overridden if | |
| // TracksAllocationSizes is overridden to return true. | |
| virtual bool TracksAllocationSizes() { return false; } | |
| // Returns true if this allocator requires tensors with 0 elements | |
| // to allocate buffers. This is false for most allocators, but may | |
| // be used by special-case allocators that want to track tensor | |
| // usage. | |
| virtual bool ShouldAllocateEmptyTensors() { return false; } | |
| // Returns the user-requested size of the data allocated at | |
| // 'ptr'. Note that the actual buffer allocated might be larger | |
| // than requested, but this function returns the size requested by | |
| // the user. | |
| // | |
| // REQUIRES: TracksAllocationSizes() is true. | |
| // | |
| // REQUIRES: 'ptr!=nullptr' and points to a buffer previously | |
| // allocated by this allocator. | |
| virtual size_t RequestedSize(void* ptr) { | |
| CHECK(false) << "allocator doesn't track sizes"; | |
| return size_t(0); | |
| } | |
| // Returns the allocated size of the buffer at 'ptr' if known, | |
| // otherwise returns RequestedSize(ptr). AllocatedSize(ptr) is | |
| // guaranteed to be >= RequestedSize(ptr). | |
| // | |
| // REQUIRES: TracksAllocationSizes() is true. | |
| // | |
| // REQUIRES: 'ptr!=nullptr' and points to a buffer previously | |
| // allocated by this allocator. | |
| virtual size_t AllocatedSize(void* ptr) { return RequestedSize(ptr); } | |
| // Returns either 0 or an identifier assigned to the buffer at 'ptr' | |
| // when the buffer was returned by AllocateRaw. If non-zero, the | |
| // identifier differs from every other ID assigned by this | |
| // allocator. | |
| // | |
| // REQUIRES: TracksAllocationSizes() is true. | |
| // | |
| // REQUIRES: 'ptr!=nullptr' and points to a buffer previously | |
| // allocated by this allocator. | |
| virtual int64 AllocationId(void* ptr) { return 0; } | |
| // Returns the allocated size of the buffer at 'ptr' if known, | |
| // otherwise returns 0. This method can be called when | |
| // TracksAllocationSizes() is false, but can be extremely slow. | |
| // | |
| // REQUIRES: 'ptr!=nullptr' and points to a buffer previously | |
| // allocated by this allocator. | |
| virtual size_t AllocatedSizeSlow(void* ptr) { | |
| if (TracksAllocationSizes()) { | |
| return AllocatedSize(ptr); | |
| } | |
| return 0; | |
| } | |
| // Fills in 'stats' with statistics collected by this allocator. | |
| virtual void GetStats(AllocatorStats* stats) { stats->Clear(); } | |
| private: | |
| // No constructors or destructors are run for simple types | |
| template <typename T> | |
| void RunCtor(T* p, size_t n) { | |
| static_assert(is_simple_type<T>::value, "T is not a simple type."); | |
| } | |
| template <typename T> | |
| void RunDtor(T* p, size_t n) {} | |
| // custom constructors and destructors that can be overridden for | |
| // non-standard allocators | |
| // Runs string's default constructor for p[0], p[1], ..., p[n-1]. | |
| virtual void RunStringCtor(string* p, size_t n) { | |
| for (size_t i = 0; i < n; ++p, ++i) new (p) string(); | |
| } | |
| // Runs string's default destructor for p[0], p[1], ..., p[n-1]. | |
| virtual void RunStringDtor(string* p, size_t n) { | |
| for (size_t i = 0; i < n; ++p, ++i) p->~string(); | |
| } | |
| virtual void RunResourceCtor(ResourceHandle* p, size_t n) { | |
| for (size_t i = 0; i < n; ++p, ++i) new (p) ResourceHandle(); | |
| } | |
| // Runs string's default destructor for p[0], p[1], ..., p[n-1]. | |
| virtual void RunResourceDtor(ResourceHandle* p, size_t n) { | |
| for (size_t i = 0; i < n; ++p, ++i) p->~ResourceHandle(); | |
| } | |
| virtual void RunVariantCtor(Variant* p, size_t n) { | |
| for (size_t i = 0; i < n; ++p, ++i) new (p) Variant(); | |
| } | |
| virtual void RunVariantDtor(Variant* p, size_t n) { | |
| for (size_t i = 0; i < n; ++p, ++i) p->~Variant(); | |
| } | |
| // TODO(jeff): Maybe provide some interface to give info about | |
| // current allocation state (total number of bytes available for | |
| // allocation, number of bytes free on device, etc.) | |
| }; | |
| // Allocator-specific constructors and destructors are used for | |
| // strings | |
| template <> | |
| inline void Allocator::RunCtor(string* p, size_t n) { | |
| RunStringCtor(p, n); | |
| } | |
| template <> | |
| inline void Allocator::RunDtor(string* p, size_t n) { | |
| RunStringDtor(p, n); | |
| } | |
| template <> | |
| inline void Allocator::RunCtor(ResourceHandle* p, size_t n) { | |
| RunResourceCtor(p, n); | |
| } | |
| template <> | |
| inline void Allocator::RunDtor(ResourceHandle* p, size_t n) { | |
| RunResourceDtor(p, n); | |
| } | |
| template <> | |
| inline void Allocator::RunCtor(Variant* p, size_t n) { | |
| RunVariantCtor(p, n); | |
| } | |
| template <> | |
| inline void Allocator::RunDtor(Variant* p, size_t n) { | |
| RunVariantDtor(p, n); | |
| } | |
| // An implementation of Allocator that delegates all calls to another Allocator. | |
| // | |
| // Useful to clients who want to override part of the functionality of another | |
| // allocator. | |
| class AllocatorWrapper : public Allocator { | |
| public: | |
| explicit AllocatorWrapper(Allocator* wrapped) : wrapped_(wrapped) {} | |
| ~AllocatorWrapper() override {} | |
| // Returns the wrapped allocator to which all calls are delegated. | |
| Allocator* wrapped() const { return wrapped_; } | |
| string Name() override { return wrapped_->Name(); } | |
| void* AllocateRaw(size_t alignment, size_t num_bytes) override { | |
| return wrapped_->AllocateRaw(alignment, num_bytes); | |
| } | |
| void* AllocateRaw(size_t alignment, size_t num_bytes, | |
| const AllocationAttributes& allocation_attr) override { | |
| return wrapped_->AllocateRaw(alignment, num_bytes, allocation_attr); | |
| } | |
| void DeallocateRaw(void* ptr) override { wrapped_->DeallocateRaw(ptr); } | |
| bool TracksAllocationSizes() override { | |
| return wrapped_->TracksAllocationSizes(); | |
| } | |
| bool ShouldAllocateEmptyTensors() override { | |
| return wrapped_->TracksAllocationSizes(); | |
| } | |
| size_t RequestedSize(void* ptr) override { | |
| return wrapped_->RequestedSize(ptr); | |
| } | |
| size_t AllocatedSize(void* ptr) override { | |
| return wrapped_->AllocatedSize(ptr); | |
| } | |
| int64 AllocationId(void* ptr) override { return wrapped_->AllocationId(ptr); } | |
| size_t AllocatedSizeSlow(void* ptr) override { | |
| return wrapped_->AllocatedSizeSlow(ptr); | |
| } | |
| private: | |
| Allocator* const wrapped_; | |
| }; | |
| // A tensorflow Op may need access to different kinds of memory that | |
| // are not simply a function of the device to which the Op has been | |
| // assigned. For example, an Op executing on a GPU may still need | |
| // to allocate CPU RAM for some purpose. Internal to the tensorflow | |
| // runtime we may choose to allocate CPU ram from special regions | |
| // that have been prepared for higher performance in some use | |
| // contexts, e.g. doing DMA with particular devices. For these | |
| // reasons, the Device interface does not expose just one memory | |
| // Allocator, but instead provides an accessor that takes a | |
| // specification of the desired memory attributes in order to select | |
| // an Allocator. | |
| // | |
| // Example use: | |
| // // Allocator for ordinary device memory: | |
| // Allocator* a = allocator(AllocatorAttributes()); | |
| // ... | |
| // // Allocator for CPU RAM, regardless of where Op is executing: | |
| // AllocatorAttributes attr; | |
| // attr.set_on_host(true); | |
| // Allocator* a = allocator(attr); | |
| struct AllocatorAttributes { | |
| void set_on_host(bool v) { value |= (static_cast<int>(v)); } | |
| bool on_host() const { return value & 0x1; } | |
| void set_nic_compatible(bool v) { value |= (static_cast<int>(v) << 1); } | |
| bool nic_compatible() const { return value & (0x1 << 1); } | |
| void set_gpu_compatible(bool v) { value |= (static_cast<int>(v) << 2); } | |
| bool gpu_compatible() const { return value & (0x1 << 2); } | |
| void Merge(AllocatorAttributes other) { value |= other.value; } | |
| // Returns true if the fields set in *this is a subset of or equal to | |
| // those set in other. | |
| bool IsEqualOrLessRestrictiveThan(const AllocatorAttributes& other) const { | |
| return (value | other.value) == other.value; | |
| } | |
| // NOTE: The upper 8 bits of the value are reserved for | |
| // device-specific uses. Implementors of a device can interpret these | |
| // upper 8 bits in device-specific ways, and ops implemented for those | |
| // devices are responsible for setting those 8 bits appropriately. | |
| uint32 value = 0; | |
| }; | |
| // Returns a trivial implementation of Allocator which uses the system | |
| // default malloc. The returned allocator is a process singleton. | |
| Allocator* cpu_allocator(); | |
| // If 'enable' is true, the process-wide cpu allocator collects | |
| // AllocatorStats. By default, it's disabled. | |
| void EnableCPUAllocatorStats(bool enable); | |
| // If 'enable' is true, the process-wide cpu allocator collects full | |
| // statistics. By default, it's disabled. | |
| void EnableCPUAllocatorFullStats(bool enable); | |
| // Abstract interface of an object that does the underlying suballoc/free of | |
| // memory for a higher-level allocator. | |
| class SubAllocator { | |
| public: | |
| virtual ~SubAllocator() {} | |
| virtual void* Alloc(size_t alignment, size_t num_bytes) = 0; | |
| virtual void Free(void* ptr, size_t num_bytes) = 0; | |
| }; | |
| } // namespace tensorflow | |