/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * License); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #pragma once #include #include #include #include #include #include #include #include #include "utils/checker.h" #include "ax_sys_api.h" #include "ax_engine_type.h" #define IO_CMM_ALIGN_SIZE 128 namespace utils { typedef enum { IO_BUFFER_STRATEGY_DEFAULT, IO_BUFFER_STRATEGY_CACHED } IO_BUFFER_STRATEGY_T; static inline AX_S32 query_model_input_size(const AX_ENGINE_IO_INFO_T* io_info, std::array &input_size, AX_IMG_FORMAT_E &eDtype) { int height = 0; int width = 0; int size = 0; int channel = 0; int data_type_size = 0; auto& input = io_info->pInputs[0]; switch (input.eLayout) { case AX_ENGINE_TENSOR_LAYOUT_NHWC: height = input.pShape[1]; width = input.pShape[2]; channel = input.pShape[3]; size = input.nSize; break; case AX_ENGINE_TENSOR_LAYOUT_NCHW: channel = input.pShape[1]; height = input.pShape[2]; width = input.pShape[3]; size = input.nSize; break; default: // NHWC height = input.pShape[1]; width = input.pShape[2]; channel = input.pShape[3]; size = input.nSize; break; } switch (input.eDataType) { case AX_ENGINE_DT_UINT8: case AX_ENGINE_DT_SINT8: data_type_size = 1; break; case AX_ENGINE_DT_UINT16: case AX_ENGINE_DT_SINT16: data_type_size = 2; break; case AX_ENGINE_DT_FLOAT32: data_type_size = 4; break; case AX_ENGINE_DT_SINT32: case AX_ENGINE_DT_UINT32: data_type_size = 4; break; case AX_ENGINE_DT_FLOAT64: data_type_size = 8; break; default: data_type_size = 1; break; } if (channel == 0 || height == 0 || width == 0 || size == 0 || data_type_size == 0) { return -1; } if (input.pExtraMeta) { switch (input.pExtraMeta->eColorSpace) { case AX_ENGINE_CS_BGR: input_size[0] = height; input_size[1] = width; eDtype = AX_FORMAT_BGR888; break; case AX_ENGINE_CS_RGB: input_size[0] = height; input_size[1] = width; eDtype = AX_FORMAT_RGB888; break; case AX_ENGINE_CS_NV12: input_size[0] = height * 2 / 3; input_size[1] = width; eDtype = AX_FORMAT_YUV420_SEMIPLANAR; break; case AX_ENGINE_CS_NV21: input_size[0] = height * 2 / 3; input_size[1] = width; eDtype = AX_FORMAT_YUV420_SEMIPLANAR_VU; break; default: // AX_ENGINE_CS_NV12 input_size[0] = height * 2 / 3; input_size[1] = width; eDtype = AX_FORMAT_YUV420_SEMIPLANAR; break; } } else { input_size[0] = height * 2 / 3; input_size[1] = width; eDtype = AX_FORMAT_YUV420_SEMIPLANAR; } ALOGD("eLayout:%d, eDataType:%d, channel:%d, height:%d, width:%d, size:%d, data_type_size:%d", input.eLayout, input.eDataType, channel, input_size[0], input_size[1], size, data_type_size); return 0; } static inline void brief_io_info(std::string strModel, const AX_ENGINE_IO_INFO_T* io_info) { auto describe_shape_type = [](AX_ENGINE_TENSOR_LAYOUT_T type) -> const char* { switch (type) { case AX_ENGINE_TENSOR_LAYOUT_NHWC: return "NHWC"; case AX_ENGINE_TENSOR_LAYOUT_NCHW: return "NCHW"; default: return "unknown"; } }; auto describe_data_type = [](AX_ENGINE_DATA_TYPE_T type) -> const char* { switch (type) { case AX_ENGINE_DT_UINT8: return "uint8"; case AX_ENGINE_DT_UINT16: return "uint16"; case AX_ENGINE_DT_FLOAT32: return "float32"; case AX_ENGINE_DT_SINT16: return "sint16"; case AX_ENGINE_DT_SINT8: return "sint8"; case AX_ENGINE_DT_SINT32: return "sint32"; case AX_ENGINE_DT_UINT32: return "uint32"; case AX_ENGINE_DT_FLOAT64: return "float64"; case AX_ENGINE_DT_UINT10_PACKED: return "uint10_packed"; case AX_ENGINE_DT_UINT12_PACKED: return "uint12_packed"; case AX_ENGINE_DT_UINT14_PACKED: return "uint14_packed"; case AX_ENGINE_DT_UINT16_PACKED: return "uint16_packed"; default: return "unknown"; } }; auto describe_memory_type = [](AX_ENGINE_MEMORY_TYPE_T type) -> const char* { switch (type) { case AX_ENGINE_MT_PHYSICAL: return "Physical"; case AX_ENGINE_MT_VIRTUAL: return "Virtual"; default: return "unknown"; } }; auto describe_color_space = [](AX_ENGINE_COLOR_SPACE_T cs) -> const char* { switch (cs) { case AX_ENGINE_CS_FEATUREMAP: return "FeatureMap"; case AX_ENGINE_CS_BGR: return "BGR"; case AX_ENGINE_CS_RGB: return "RGB"; case AX_ENGINE_CS_RGBA: return "RGBA"; case AX_ENGINE_CS_GRAY: return "GRAY"; case AX_ENGINE_CS_NV12: return "NV12"; case AX_ENGINE_CS_NV21: return "NV21"; case AX_ENGINE_CS_YUV444: return "YUV444"; case AX_ENGINE_CS_RAW8: return "RAW8"; case AX_ENGINE_CS_RAW10: return "RAW10"; case AX_ENGINE_CS_RAW12: return "RAW12"; case AX_ENGINE_CS_RAW14: return "RAW14"; case AX_ENGINE_CS_RAW16: return "RAW16"; default: return "unknown"; } }; printf("Model Name: %s\n", strModel.c_str()); printf("Max Batch Size %d\n", io_info->nMaxBatchSize); printf("Support Dynamic Batch? %s\n", io_info->bDynamicBatchSize == AX_TRUE ? "Yes" : "No"); for (uint32_t i = 0; i < io_info->nInputSize; ++i) { auto& input = io_info->pInputs[i]; printf("Input[%d]: %s\n", i, input.pName); printf(" Shape ["); for (uint32_t j = 0; j < input.nShapeSize; ++j) { printf("%d", (int)input.pShape[j]); if (j + 1 < input.nShapeSize) printf(", "); } printf("] %s %s %s %s\n", describe_shape_type(input.eLayout), describe_data_type(input.eDataType), input.pExtraMeta ? describe_color_space(input.pExtraMeta->eColorSpace) : "", input.nQuantizationValue > 0 ? ("Q=" + std::to_string(input.nQuantizationValue)).c_str() : ""); printf(" Memory %s\n", describe_memory_type(input.eMemoryType)); printf(" Size %u\n", input.nSize); } for (uint32_t i = 0; i < io_info->nOutputSize; ++i) { auto& output = io_info->pOutputs[i]; printf("Output[%d]: %s\n", i, output.pName); printf(" Shape ["); for (uint32_t j = 0; j < output.nShapeSize; ++j) { printf("%d", (int)output.pShape[j]); if (j + 1 < output.nShapeSize) printf(", "); } printf("] %s %s %s\n", describe_shape_type(output.eLayout), describe_data_type(output.eDataType), output.nQuantizationValue > 0 ? ("Q=" + std::to_string(output.nQuantizationValue)).c_str() : ""); printf(" Memory %s\n", describe_memory_type(output.eMemoryType)); printf(" Size %u\n", output.nSize); } } static inline AX_S32 alloc_engine_buffer(const std::string& token, const std::string& appendix, size_t index, const AX_ENGINE_IOMETA_T* pMeta, AX_ENGINE_IO_BUFFER_T* pBuf, IO_BUFFER_STRATEGY_T eStrategy = IO_BUFFER_STRATEGY_DEFAULT) { AX_S32 ret = -1; if (eStrategy != IO_BUFFER_STRATEGY_DEFAULT && eStrategy != IO_BUFFER_STRATEGY_CACHED) { fprintf(stderr, "strategy %d not supported\n", (int)eStrategy); return -1; } memset(pBuf, 0, sizeof(AX_ENGINE_IO_BUFFER_T)); pBuf->nSize = pMeta->nSize; const std::string token_name = "skel_" + token + appendix + std::to_string(index); if (eStrategy == IO_BUFFER_STRATEGY_CACHED) { ret = AX_SYS_MemAllocCached((AX_U64*)&pBuf->phyAddr, &pBuf->pVirAddr, pBuf->nSize, IO_CMM_ALIGN_SIZE, (const AX_S8*)token_name.c_str()); } else { ret = AX_SYS_MemAlloc((AX_U64*)&pBuf->phyAddr, &pBuf->pVirAddr, pBuf->nSize, IO_CMM_ALIGN_SIZE, (const AX_S8*)token_name.c_str()); } return ret; } static inline AX_S32 free_engine_buffer(AX_ENGINE_IO_BUFFER_T* pBuf) { if (pBuf->phyAddr == 0) { delete[] reinterpret_cast(pBuf->pVirAddr); } else { AX_SYS_MemFree(pBuf->phyAddr, pBuf->pVirAddr); } pBuf->phyAddr = 0; pBuf->pVirAddr = nullptr; return 0; } static inline void free_io_index(AX_ENGINE_IO_BUFFER_T* io_buf, size_t index) { AX_ENGINE_IO_BUFFER_T* pBuf = io_buf + index; free_engine_buffer(pBuf); } static inline void free_io(AX_ENGINE_IO_T &io) { for (size_t j = 0; j < io.nInputSize; ++j) { AX_ENGINE_IO_BUFFER_T *pBuf = io.pInputs + j; AX_SYS_MemFree(pBuf->phyAddr, pBuf->pVirAddr); } for (size_t j = 0; j < io.nOutputSize; ++j) { AX_ENGINE_IO_BUFFER_T *pBuf = io.pOutputs + j; AX_SYS_MemFree(pBuf->phyAddr, pBuf->pVirAddr); } delete[] io.pInputs; delete[] io.pOutputs; } static inline void free_io(AX_ENGINE_IO_T &io, std::vector> &vecOutputBuffer) { if (io.pInputs) { delete[] io.pInputs; io.pInputs = nullptr; } if (io.pOutputs) { for (size_t index = 0; index < vecOutputBuffer.size(); ++index) { AX_ENGINE_IO_BUFFER_T *pOutputs = &vecOutputBuffer[index][0]; for (size_t j = 0; j < io.nOutputSize; ++j) { free_io_index(pOutputs, j); } } delete[] io.pOutputs; io.pOutputs = nullptr; } } static inline int prepare_io(const std::string& token, const AX_ENGINE_IO_INFO_T* info, AX_ENGINE_IO_T &io, IO_BUFFER_STRATEGY_T strategy) { auto ret = 0; memset(&io, 0, sizeof(io)); io.pInputs = new AX_ENGINE_IO_BUFFER_T[info->nInputSize]; if (!io.pInputs) { goto EXIT; } memset(io.pInputs, 0x00, sizeof(AX_ENGINE_IO_BUFFER_T) * info->nInputSize); io.nInputSize = info->nInputSize; for (AX_U32 i = 0; i < info->nInputSize; ++i) { auto meta = info->pInputs[i]; auto buffer = &io.pInputs[i]; ret = alloc_engine_buffer(token, "_input_", i, &meta, buffer, strategy); if (ret != 0) { free_io_index(io.pInputs, i); return ret; } } io.pOutputs = new AX_ENGINE_IO_BUFFER_T[info->nOutputSize]; if (!io.pOutputs) { goto EXIT; } memset(io.pOutputs, 0x00, sizeof(AX_ENGINE_IO_BUFFER_T) * info->nOutputSize); io.nOutputSize = info->nOutputSize; for (size_t i = 0; i < info->nOutputSize; ++i) { auto meta = info->pOutputs[i]; auto buffer = &io.pOutputs[i]; ret = alloc_engine_buffer(token, "_output_", i, &meta, buffer, strategy); if (ret != 0) { goto EXIT; } } EXIT: if (ret != 0) { free_io(io); return -1; } return 0; } static inline int prepare_io(const std::string& token, const AX_ENGINE_IO_INFO_T* info, AX_ENGINE_IO_T &io, std::vector &vecOutputBuffer, const IO_BUFFER_STRATEGY_T &strategy) { AX_S32 ret = 0; memset(&io, 0, sizeof(io)); std::vector outputBuffer; if (1 != info->nInputSize) { fprintf(stderr, "[ERR]: Only single input was accepted(got %u).\n", info->nInputSize); return -1; } io.pInputs = new AX_ENGINE_IO_BUFFER_T[info->nInputSize]; if (!io.pInputs) { goto EXIT; } memset(io.pInputs, 0x00, sizeof(AX_ENGINE_IO_BUFFER_T) * info->nInputSize); io.nInputSize = info->nInputSize; for (AX_U32 i = 0; i < info->nInputSize; ++i) { auto meta = info->pInputs[i]; auto buffer = &io.pInputs[i]; ret = alloc_engine_buffer(token, "_input_", i, &meta, buffer, strategy); if (ret != 0) { free_io_index(io.pInputs, i); return ret; } } io.pOutputs = new AX_ENGINE_IO_BUFFER_T[info->nOutputSize]; if (!io.pOutputs) { goto EXIT; } for (size_t i = 0; i < info->nOutputSize; ++i) { auto meta = info->pOutputs[i]; auto buffer = &io.pOutputs[i]; ret = alloc_engine_buffer(token, "_output_", i, &meta, buffer, strategy); if (ret != 0) { goto EXIT; } vecOutputBuffer.push_back(*buffer); } memset(io.pOutputs, 0x00, sizeof(AX_ENGINE_IO_BUFFER_T) * info->nOutputSize); io.nOutputSize = info->nOutputSize; for (size_t i = 0; i < info->nOutputSize; ++i) { auto buffer = &io.pOutputs[i]; *buffer = vecOutputBuffer[i]; } EXIT: if (ret != 0) { free_io(io); return -1; } return 0; } static inline AX_S32 push_io_output(const AX_ENGINE_IO_INFO_T* info, AX_ENGINE_IO_T& io, std::vector &outputBuffer) { for (size_t i = 0; i < info->nOutputSize; ++i) { auto buffer = &io.pOutputs[i]; *buffer = outputBuffer[i]; } return 0; } static inline AX_S32 cache_io_flush(const AX_ENGINE_IO_BUFFER_T *io_buf) { if (io_buf->phyAddr != 0) { AX_SYS_MflushCache(io_buf->phyAddr, io_buf->pVirAddr, io_buf->nSize); } return 0; } static inline AX_S32 push_io_input(void* input, int index, AX_ENGINE_IO_T& io) { // img ranks_depth ranks_feat ranks_bev, n_points AX_ENGINE_IO_BUFFER_T* pImg = &io.pInputs[index]; memcpy(pImg->pVirAddr, input, pImg->nSize); cache_io_flush(pImg); return 0; } static inline AX_S32 push_io_output(void* output, int index, AX_ENGINE_IO_T& io) { AX_ENGINE_IO_BUFFER_T* pImg = &io.pOutputs[index]; cache_io_flush(pImg); memcpy(output, pImg->pVirAddr, pImg->nSize); return 0; } static inline AX_S32 cpu_copy(AX_U64 nPhyAddrSrc, AX_U64 nPhyAddrDst, AX_U32 nLen) { if (nPhyAddrSrc != 0 && nPhyAddrDst != 0 && nLen > 0) { AX_VOID* pSrcVirAddr = AX_SYS_MmapCache(nPhyAddrSrc, nLen); AX_VOID* pDstVirAddr = AX_SYS_MmapCache(nPhyAddrDst, nLen); memcpy((AX_VOID*)pDstVirAddr, (AX_VOID*)pSrcVirAddr, nLen); AX_SYS_Munmap(pSrcVirAddr, nLen); AX_SYS_Munmap(pDstVirAddr, nLen); return 0; } return -1; } static inline AX_S32 inc_io_ref_cnt(const AX_VIDEO_FRAME_T &stFrame) { if (stFrame.u32BlkId[0] > 0) { AX_POOL_IncreaseRefCnt(stFrame.u32BlkId[0]); } if (stFrame.u32BlkId[1] > 0) { AX_POOL_IncreaseRefCnt(stFrame.u32BlkId[1]); } if (stFrame.u32BlkId[2] > 0) { AX_POOL_IncreaseRefCnt(stFrame.u32BlkId[2]); } return 0; } static inline AX_S32 dec_io_ref_cnt(const AX_VIDEO_FRAME_T &stFrame) { if (stFrame.u32BlkId[0] > 0) { AX_POOL_DecreaseRefCnt(stFrame.u32BlkId[0]); } if (stFrame.u32BlkId[1] > 0) { AX_POOL_DecreaseRefCnt(stFrame.u32BlkId[1]); } if (stFrame.u32BlkId[2] > 0) { AX_POOL_DecreaseRefCnt(stFrame.u32BlkId[2]); } return 0; } static inline bool read_file(const char* path, std::vector& data) { std::fstream fs(path, std::ios::in | std::ios::binary); if (!fs.is_open()) { return false; } fs.seekg(std::ios::end); auto fs_end = fs.tellg(); fs.seekg(std::ios::beg); auto fs_beg = fs.tellg(); auto file_size = static_cast(fs_end - fs_beg); auto vector_size = data.size(); data.reserve(vector_size + file_size); data.insert(data.end(), std::istreambuf_iterator(fs), std::istreambuf_iterator()); fs.close(); return true; } static inline bool read_file(const char* path, AX_VOID **pModelBufferVirAddr, AX_U64 &u64ModelBufferPhyAddr, AX_U32 &nModelBufferSize) { std::fstream fs(path, std::ios::in | std::ios::binary); if (!fs.is_open()) { return false; } fs.seekg(0, std::ios::end); int file_size = fs.tellg(); fs.seekg(0, std::ios::beg); nModelBufferSize = (AX_U32)file_size; AX_SYS_MemAlloc(&u64ModelBufferPhyAddr, pModelBufferVirAddr, nModelBufferSize, 0x100, (AX_S8 *)"SKEL-CV"); if (!pModelBufferVirAddr || (u64ModelBufferPhyAddr == 0)) { return false; } fs.read((AX_CHAR *)*pModelBufferVirAddr, nModelBufferSize); fs.close(); return true; } static inline void dequant(float** pptrOutput, const AX_ENGINE_IOMETA_T& ptrIoInfo, const AX_ENGINE_IO_BUFFER_T& ioBuf, float zp, float scale) { if (ptrIoInfo.eDataType == AX_ENGINE_DT_FLOAT32) { *pptrOutput = (float*)ioBuf.pVirAddr; return; } *pptrOutput = (float*)malloc(ptrIoInfo.nSize * sizeof(float)); uint8_t *pBuf = (uint8_t*)ioBuf.pVirAddr; float* pOutput = *pptrOutput; // float inv_scale = 1.0f / scale; for (int i = 0; i < ptrIoInfo.nSize; i++) { pOutput[i] = ((float)pBuf[i] - zp) * scale; } } }