| #pragma once |
| #include <acl/acl.h> |
| #include <aclnn/acl_meta.h> |
| #include <cstdio> |
| #include <cstdlib> |
| #include <memory> |
| #include <string> |
| #include <vector> |
|
|
| #define ACL_CHECK(x) do { \ |
| aclError __e = (x); \ |
| if (__e != ACL_ERROR_NONE) { \ |
| fprintf(stderr, "ACL error %d at %s:%d : %s\n", __e, __FILE__, __LINE__, #x); \ |
| std::abort(); \ |
| } \ |
| } while(0) |
|
|
| #define ACLNN_CHECK(x) do { \ |
| aclnnStatus __e = (x); \ |
| if (__e != 0) { \ |
| const char* __msg = aclGetRecentErrMsg(); \ |
| fprintf(stderr, "aclnn error %d at %s:%d : %s\n msg: %s\n", (int)__e, __FILE__, __LINE__, #x, __msg ? __msg : "(null)"); \ |
| std::abort(); \ |
| } \ |
| } while(0) |
|
|
| |
| struct AclTensorDel { void operator()(aclTensor* t) const { if (t) aclDestroyTensor(t); } }; |
| using AclTensorPtr = std::unique_ptr<aclTensor, AclTensorDel>; |
|
|
| struct AclTensorListDel { void operator()(aclTensorList* t) const { if (t) aclDestroyTensorList(t); } }; |
| using AclTensorListPtr = std::unique_ptr<aclTensorList, AclTensorListDel>; |
|
|
| struct AclIntArrayDel { void operator()(aclIntArray* a) const { if (a) aclDestroyIntArray(a); } }; |
| using AclIntArrayPtr = std::unique_ptr<aclIntArray, AclIntArrayDel>; |
|
|
| |
| |
| inline AclTensorPtr make_acl_tensor(void* data, aclDataType dt, |
| const std::vector<int64_t>& shape, |
| const std::vector<int64_t>& stride_elems, |
| aclFormat fmt = ACL_FORMAT_ND) { |
| int64_t n = (int64_t)shape.size(); |
| int64_t storage_len = 1; |
| for (int i = 0; i < n; i++) storage_len += (shape[i] - 1) * stride_elems[i]; |
| aclTensor* t = aclCreateTensor( |
| shape.data(), (uint64_t)n, dt, |
| stride_elems.data(), 0, fmt, |
| &storage_len, 1, data); |
| return AclTensorPtr(t); |
| } |
|
|
| |
| inline std::vector<int64_t> contiguous_strides(const std::vector<int64_t>& shape) { |
| int n = (int)shape.size(); |
| std::vector<int64_t> s(n); |
| int64_t acc = 1; |
| for (int i = n - 1; i >= 0; --i) { |
| s[i] = acc; |
| acc *= shape[i]; |
| } |
| return s; |
| } |
|
|
| inline AclTensorPtr make_contig_tensor(void* data, aclDataType dt, |
| const std::vector<int64_t>& shape, |
| aclFormat fmt = ACL_FORMAT_ND) { |
| return make_acl_tensor(data, dt, shape, contiguous_strides(shape), fmt); |
| } |
|
|
| inline size_t dtype_size(aclDataType dt) { |
| switch (dt) { |
| case ACL_FLOAT: return 4; |
| case ACL_FLOAT16: return 2; |
| case ACL_BF16: return 2; |
| case ACL_INT8: return 1; |
| case ACL_INT32: return 4; |
| case ACL_INT64: return 8; |
| default: return 0; |
| } |
| } |
|
|
| |
| struct DeviceBuffer { |
| void* ptr = nullptr; |
| size_t size = 0; |
|
|
| DeviceBuffer() = default; |
| explicit DeviceBuffer(size_t bytes) { alloc(bytes); } |
| ~DeviceBuffer() { if (ptr) aclrtFree(ptr); } |
| DeviceBuffer(const DeviceBuffer&) = delete; |
| DeviceBuffer& operator=(const DeviceBuffer&) = delete; |
| DeviceBuffer(DeviceBuffer&& o) noexcept : ptr(o.ptr), size(o.size) { o.ptr = nullptr; o.size = 0; } |
| DeviceBuffer& operator=(DeviceBuffer&& o) noexcept { |
| if (this != &o) { if (ptr) aclrtFree(ptr); ptr = o.ptr; size = o.size; o.ptr = nullptr; o.size = 0; } |
| return *this; |
| } |
|
|
| void alloc(size_t bytes) { |
| if (ptr) aclrtFree(ptr); |
| ACL_CHECK(aclrtMalloc(&ptr, bytes, ACL_MEM_MALLOC_HUGE_FIRST)); |
| size = bytes; |
| } |
| void* get() { return ptr; } |
| const void* get() const { return ptr; } |
| }; |
|
|